1515 */
1616
1717// [START dataproc_quickstart]
18+ /* This quickstart sample walks a user through creating a Cloud Dataproc
19+ * cluster, submitting a PySpark job from Google Cloud Storage to the
20+ * cluster, reading the output of the job and deleting the cluster, all
21+ * using the Java client library.
22+ *
23+ * Usage:
24+ * mvn clean package -DskipTests
25+ *
26+ * mvn exec:java -Dexec.args="<PROJECT_ID> <REGION> <CLUSTER_NAME> <GCS_JOB_FILE_PATH>"
27+ *
28+ * You can also set these arguments in the main function instead of providing them via the CLI.
29+ */
30+
1831import com .google .api .gax .longrunning .OperationFuture ;
1932import com .google .cloud .dataproc .v1 .Cluster ;
2033import com .google .cloud .dataproc .v1 .ClusterConfig ;
@@ -60,15 +73,6 @@ public static Job waitForJobCompletion(
6073 }
6174 }
6275
63- public static void quickstart () throws IOException , InterruptedException {
64- // TODO(developer): Replace these variables before running the sample.
65- String projectId = "your-project-id" ;
66- String region = "your-project-region" ;
67- String clusterName = "your-cluster-name" ;
68- String jobFilePath = "your-job-file-path" ;
69- quickstart (projectId , region , clusterName , jobFilePath );
70- }
71-
7276 public static void quickstart (
7377 String projectId , String region , String clusterName , String jobFilePath )
7478 throws IOException , InterruptedException {
@@ -82,10 +86,10 @@ public static void quickstart(
8286 JobControllerSettings jobControllerSettings =
8387 JobControllerSettings .newBuilder ().setEndpoint (myEndpoint ).build ();
8488
85- // Create both a cluster controller client and job controller client with the configured
86- // settings. The client only needs to be created once and can be reused for multiple requests.
87- // Using a try-with-resources closes the client, but this can also be done manually with
88- // the .close() method.
89+ // Create both a cluster controller client and job controller client with the
90+ // configured settings. The client only needs to be created once and can be reused for
91+ // multiple requests. Using a try-with-resources closes the client, but this can also be done
92+ // manually with the .close() method.
8993 try (ClusterControllerClient clusterControllerClient =
9094 ClusterControllerClient .create (clusterControllerSettings );
9195 JobControllerClient jobControllerClient =
@@ -114,7 +118,8 @@ public static void quickstart(
114118 OperationFuture <Cluster , ClusterOperationMetadata > createClusterAsyncRequest =
115119 clusterControllerClient .createClusterAsync (projectId , region , cluster );
116120 Cluster response = createClusterAsyncRequest .get ();
117- System .out .printf ("Cluster created successfully: %s" , response .getClusterName ());
121+ System .out .println (
122+ String .format ("Cluster created successfully: %s" , response .getClusterName ()));
118123
119124 // Configure the settings for our job.
120125 JobPlacement jobPlacement = JobPlacement .newBuilder ().setClusterName (clusterName ).build ();
@@ -133,7 +138,7 @@ public static void quickstart(
133138 int timeout = 10 ;
134139 try {
135140 Job jobInfo = finishedJobFuture .get (timeout , TimeUnit .MINUTES );
136- System .out .printf ( "Job %s finished successfully." , jobId );
141+ System .out .println ( String . format ( "Job %s finished successfully." , jobId ) );
137142
138143 // Cloud Dataproc job output gets saved to a GCS bucket allocated to it.
139144 Cluster clusterInfo = clusterControllerClient .getCluster (projectId , region , clusterName );
@@ -163,5 +168,21 @@ public static void quickstart(
163168 System .err .println (String .format ("Error executing quickstart: %s " , e .getMessage ()));
164169 }
165170 }
171+
172+ public static void main (String ... args ) throws IOException , InterruptedException {
173+ if (args .length != 4 ) {
174+ System .err .println (
175+ "Insufficient number of parameters provided. Please make sure a "
176+ + "PROJECT_ID, REGION, CLUSTER_NAME and JOB_FILE_PATH are provided, in this order." );
177+ return ;
178+ }
179+
180+ String projectId = args [0 ]; // project-id of project to create the cluster in
181+ String region = args [1 ]; // region to create the cluster
182+ String clusterName = args [2 ]; // name of the cluster
183+ String jobFilePath = args [3 ]; // location in GCS of the PySpark job
184+
185+ quickstart (projectId , region , clusterName , jobFilePath );
186+ }
166187}
167188// [END dataproc_quickstart]
0 commit comments