feat: Additional fields for the ClusterConfig and `InstanceGroupCon…

…fig, update protos (#417) * feat: Additional fields for the `ClusterConfig` and `InstanceGroupConfig` messages. This change includes the following updates: 1. There is a new `temp_bucket` field for clusters. 2. There is a new `endpoint_config` field for clusters. 3. There is a new `preemptibility` field for instance group configs. 4. There are various updates to the doc comments. PiperOrigin-RevId: 323829608 Source-Author: Google APIs <noreply@google.com> Source-Date: Wed Jul 29 11:26:43 2020 -0700 Source-Repo: googleapis/googleapis Source-Sha: d8a3dfb82f5cae3f1bcdcec7c5726581532da7d5 Source-Link: googleapis/googleapis@d8a3dfb
googleapis · Jul 31, 2020 · b135c5b · b135c5b
1 parent 0944c31
commit b135c5b
Show file tree

Hide file tree

Showing 9 changed files with 656 additions and 41 deletions.
diff --git a/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/autoscaling_policies.proto b/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/autoscaling_policies.proto
@@ -164,20 +164,26 @@ message BasicYarnAutoscalingConfig {
  // Bounds: [0s, 1d].
  google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];
 
- // Required. Fraction of average pending memory in the last cooldown period
+ // Required. Fraction of average YARN pending memory in the last cooldown period
  // for which to add workers. A scale-up factor of 1.0 will result in scaling
  // up so that there is no pending memory remaining after the update (more
  // aggressive scaling). A scale-up factor closer to 0 will result in a smaller
  // magnitude of scaling up (less aggressive scaling).
+ // See [How autoscaling
+ // works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
+ // for more information.
  //
  // Bounds: [0.0, 1.0].
  double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];
 
- // Required. Fraction of average pending memory in the last cooldown period
+ // Required. Fraction of average YARN pending memory in the last cooldown period
  // for which to remove workers. A scale-down factor of 1 will result in
  // scaling down so that there is no available memory remaining after the
  // update (more aggressive scaling). A scale-down factor of 0 disables
  // removing workers, which can be beneficial for autoscaling a single job.
+ // See [How autoscaling
+ // works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
+ // for more information.
  //
  // Bounds: [0.0, 1.0].
  double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];

diff --git a/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/clusters.proto b/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/clusters.proto
@@ -170,6 +170,17 @@ message ClusterConfig {
  // bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
  string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
 
+ // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
+ // such as Spark and MapReduce history files.
+ // If you do not specify a temp bucket,
+ // Dataproc will determine a Cloud Storage location (US,
+ // ASIA, or EU) for your cluster's temp bucket according to the
+ // Compute Engine zone where your cluster is deployed, and then create
+ // and manage this project-level, per-location bucket. The default bucket has
+ // a TTL of 90 days, but you can use any TTL (or none) if you specify a
+ // bucket.
+ string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];
+
  // Optional. The shared Compute Engine config settings for
  // all instances in a cluster.
  GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
@@ -216,6 +227,20 @@ message ClusterConfig {
 
  // Optional. Lifecycle setting for the cluster.
  LifecycleConfig lifecycle_config = 17 [(google.api.field_behavior) = OPTIONAL];
+
+ // Optional. Port/endpoint configuration for this cluster
+ EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Endpoint config for this cluster
+message EndpointConfig {
+ // Output only. The map of port descriptions to URLs. Will only be populated
+ // if enable_http_port_access is true.
+ map<string, string> http_ports = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+ // Optional. If true, enable http access to specific ports on the cluster
+ // from external sources. Defaults to false.
+ bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Autoscaling Policy config associated with the cluster.
@@ -288,7 +313,7 @@ message GceClusterConfig {
  bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
 
  // Optional. The [Dataproc service
- // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
+ // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
  // (also see [VM Data Plane
  // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
  // used by Dataproc cluster VM instances to access Google Cloud Platform
@@ -332,6 +357,27 @@ message GceClusterConfig {
 // The config settings for Compute Engine resources in
 // an instance group, such as a master or worker group.
 message InstanceGroupConfig {
+ // Controls the use of
+ // [preemptible instances]
+ // (https://cloud.google.com/compute/docs/instances/preemptible)
+ // within the group.
+ enum Preemptibility {
+ // Preemptibility is unspecified, the system will choose the
+ // appropriate setting for each instance group.
+ PREEMPTIBILITY_UNSPECIFIED = 0;
+
+ // Instances are non-preemptible.
+ //
+ // This option is allowed for all instance groups and is the only valid
+ // value for Master and Worker instance groups.
+ NON_PREEMPTIBLE = 1;
+
+ // Instances are preemptible.
+ //
+ // This option is allowed only for secondary worker groups.
+ PREEMPTIBLE = 2;
+ }
+
  // Optional. The number of VM instances in the instance group.
  // For master instance groups, must be set to 1.
  int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
@@ -382,6 +428,15 @@ message InstanceGroupConfig {
  // instances.
  bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
 
+ // Optional. Specifies the preemptibility of the instance group.
+ //
+ // The default value for master and worker groups is
+ // `NON_PREEMPTIBLE`. This default cannot be changed.
+ //
+ // The default value for secondary instances is
+ // `PREEMPTIBLE`.
+ Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];
+
  // Output only. The config for Compute Engine Instance Group
  // Manager that manages this group.
  // This is only used for preemptible instance groups.
@@ -608,7 +663,7 @@ message KerberosConfig {
 message SoftwareConfig {
  // Optional. The version of software inside the cluster. It must be one of the
  // supported [Dataproc
- // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
+ // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
  // such as "1.2" (including a subminor version, such as "1.2.29"), or the
  // ["preview"
  // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).

diff --git a/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/jobs.proto b/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/jobs.proto
@@ -224,12 +224,12 @@ message SparkJob {
  // Spark driver and tasks.
  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
- // Optional. HCFS URIs of files to be copied to the working directory of
- // Spark drivers and distributed tasks. Useful for naively parallel tasks.
+ // Optional. HCFS URIs of files to be placed in the working directory of
+ // each executor. Useful for naively parallel tasks.
  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
 
- // Optional. HCFS URIs of archives to be extracted in the working directory
- // of Spark drivers and tasks. Supported file types:
+ // Optional. HCFS URIs of archives to be extracted into the working directory
+ // of each executor. Supported file types:
  // .jar, .tar, .tar.gz, .tgz, and .zip.
  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
 
@@ -265,11 +265,12 @@ message PySparkJob {
  // Python driver and tasks.
  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
- // Optional. HCFS URIs of files to be copied to the working directory of
- // Python drivers and distributed tasks. Useful for naively parallel tasks.
+ // Optional. HCFS URIs of files to be placed in the working directory of
+ // each executor. Useful for naively parallel tasks.
  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
 
- // Optional. HCFS URIs of archives to be extracted in the working directory of
+ // Optional. HCFS URIs of archives to be extracted into the working directory
+ // of each executor. Supported file types:
  // .jar, .tar, .tar.gz, .tgz, and .zip.
  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
 
@@ -414,12 +415,12 @@ message SparkRJob {
  // occur that causes an incorrect job submission.
  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
 
- // Optional. HCFS URIs of files to be copied to the working directory of
- // R drivers and distributed tasks. Useful for naively parallel tasks.
+ // Optional. HCFS URIs of files to be placed in the working directory of
+ // each executor. Useful for naively parallel tasks.
  repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
 
- // Optional. HCFS URIs of archives to be extracted in the working directory of
- // Spark drivers and tasks. Supported file types:
+ // Optional. HCFS URIs of archives to be extracted into the working directory
+ // of each executor. Supported file types:
  // .jar, .tar, .tar.gz, .tgz, and .zip.
  repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
@@ -565,9 +566,9 @@ message JobStatus {
 
 // Encapsulates the full scoping used to reference a job.
 message JobReference {
- // Required. The ID of the Google Cloud Platform project that the job
- // belongs to.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
+ // Optional. The ID of the Google Cloud Platform project that the job belongs to. If
+ // specified, must match the request project ID.
+ string project_id = 1 [(google.api.field_behavior) = OPTIONAL];
 
  // Optional. The job ID, which must be unique within the project.
  //

diff --git a/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/shared.proto b/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/shared.proto
@@ -25,7 +25,7 @@ option java_package = "com.google.cloud.dataproc.v1";
 
 // Cluster components that can be activated.
 enum Component {
- // Unspecified component.
+ // Unspecified component. Specifying this will cause Cluster creation to fail.
  COMPONENT_UNSPECIFIED = 0;
 
  // The Anaconda python distribution.

diff --git a/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/workflow_templates.proto b/packages/google-cloud-dataproc/protos/google/cloud/dataproc/v1/workflow_templates.proto
@@ -238,7 +238,7 @@ message WorkflowTemplate {
  // Required. The Directed Acyclic Graph of Jobs to submit.
  repeated OrderedJob jobs = 8 [(google.api.field_behavior) = REQUIRED];
 
- // Optional. emplate parameters whose values are substituted into the
+ // Optional. Template parameters whose values are substituted into the
  // template. Values for parameters must be provided when the template is
  // instantiated.
  repeated TemplateParameter parameters = 9 [(google.api.field_behavior) = OPTIONAL];
@@ -319,23 +319,29 @@ message OrderedJob {
 
  // Required. The job definition.
  oneof job_type {
- HadoopJob hadoop_job = 2;
+ // Optional. Job is a Hadoop job.
+ HadoopJob hadoop_job = 2 [(google.api.field_behavior) = OPTIONAL];
 
- SparkJob spark_job = 3;
+ // Optional. Job is a Spark job.
+ SparkJob spark_job = 3 [(google.api.field_behavior) = OPTIONAL];
 
- PySparkJob pyspark_job = 4;
+ // Optional. Job is a PySpark job.
+ PySparkJob pyspark_job = 4 [(google.api.field_behavior) = OPTIONAL];
 
- HiveJob hive_job = 5;
+ // Optional. Job is a Hive job.
+ HiveJob hive_job = 5 [(google.api.field_behavior) = OPTIONAL];
 
- PigJob pig_job = 6;
+ // Optional. Job is a Pig job.
+ PigJob pig_job = 6 [(google.api.field_behavior) = OPTIONAL];
 
- // Spark R job
- SparkRJob spark_r_job = 11;
+ // Optional. Job is a SparkR job.
+ SparkRJob spark_r_job = 11 [(google.api.field_behavior) = OPTIONAL];
 
- SparkSqlJob spark_sql_job = 7;
+ // Optional. Job is a SparkSql job.
+ SparkSqlJob spark_sql_job = 7 [(google.api.field_behavior) = OPTIONAL];
 
- // Presto job
- PrestoJob presto_job = 12;
+ // Optional. Job is a Presto job.
+ PrestoJob presto_job = 12 [(google.api.field_behavior) = OPTIONAL];
  }
 
  // Optional. The labels to associate with this job.