Skip to content

Commit

Permalink
feat: Additional fields for the ClusterConfig and `InstanceGroupCon…
Browse files Browse the repository at this point in the history
…fig, update protos (#417)

* feat: Additional fields for the `ClusterConfig` and `InstanceGroupConfig` messages.

This change includes the following updates:
1. There is a new `temp_bucket` field for clusters.
2. There is a new `endpoint_config` field for clusters.
3. There is a new `preemptibility` field for instance group configs.
4. There are various updates to the doc comments.

PiperOrigin-RevId: 323829608

Source-Author: Google APIs <noreply@google.com>
Source-Date: Wed Jul 29 11:26:43 2020 -0700
Source-Repo: googleapis/googleapis
Source-Sha: d8a3dfb82f5cae3f1bcdcec7c5726581532da7d5
Source-Link: googleapis/googleapis@d8a3dfb
  • Loading branch information
yoshi-automation authored Jul 31, 2020
1 parent 0944c31 commit b135c5b
Show file tree
Hide file tree
Showing 9 changed files with 656 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,20 +164,26 @@ message BasicYarnAutoscalingConfig {
// Bounds: [0s, 1d].
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to add workers. A scale-up factor of 1.0 will result in scaling
// up so that there is no pending memory remaining after the update (more
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
// magnitude of scaling up (less aggressive scaling).
// See [How autoscaling
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to remove workers. A scale-down factor of 1 will result in
// scaling down so that there is no available memory remaining after the
// update (more aggressive scaling). A scale-down factor of 0 disables
// removing workers, which can be beneficial for autoscaling a single job.
// See [How autoscaling
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ message ClusterConfig {
// bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
// such as Spark and MapReduce history files.
// If you do not specify a temp bucket,
// Dataproc will determine a Cloud Storage location (US,
// ASIA, or EU) for your cluster's temp bucket according to the
// Compute Engine zone where your cluster is deployed, and then create
// and manage this project-level, per-location bucket. The default bucket has
// a TTL of 90 days, but you can use any TTL (or none) if you specify a
// bucket.
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. The shared Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -216,6 +227,20 @@ message ClusterConfig {

// Optional. Lifecycle setting for the cluster.
LifecycleConfig lifecycle_config = 17 [(google.api.field_behavior) = OPTIONAL];

// Optional. Port/endpoint configuration for this cluster
EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
}

// Endpoint config for this cluster
message EndpointConfig {
// Output only. The map of port descriptions to URLs. Will only be populated
// if enable_http_port_access is true.
map<string, string> http_ports = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. If true, enable http access to specific ports on the cluster
// from external sources. Defaults to false.
bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Autoscaling Policy config associated with the cluster.
Expand Down Expand Up @@ -288,7 +313,7 @@ message GceClusterConfig {
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. The [Dataproc service
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
// (also see [VM Data Plane
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
// used by Dataproc cluster VM instances to access Google Cloud Platform
Expand Down Expand Up @@ -332,6 +357,27 @@ message GceClusterConfig {
// The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Controls the use of
// [preemptible instances]
// (https://cloud.google.com/compute/docs/instances/preemptible)
// within the group.
enum Preemptibility {
// Preemptibility is unspecified, the system will choose the
// appropriate setting for each instance group.
PREEMPTIBILITY_UNSPECIFIED = 0;

// Instances are non-preemptible.
//
// This option is allowed for all instance groups and is the only valid
// value for Master and Worker instance groups.
NON_PREEMPTIBLE = 1;

// Instances are preemptible.
//
// This option is allowed only for secondary worker groups.
PREEMPTIBLE = 2;
}

// Optional. The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -382,6 +428,15 @@ message InstanceGroupConfig {
// instances.
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Specifies the preemptibility of the instance group.
//
// The default value for master and worker groups is
// `NON_PREEMPTIBLE`. This default cannot be changed.
//
// The default value for secondary instances is
// `PREEMPTIBLE`.
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];

// Output only. The config for Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
Expand Down Expand Up @@ -608,7 +663,7 @@ message KerberosConfig {
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must be one of the
// supported [Dataproc
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
// ["preview"
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,12 @@ message SparkJob {
// Spark driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory
// of Spark drivers and tasks. Supported file types:
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -265,11 +265,12 @@ message PySparkJob {
// Python driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// Python drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory of
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -414,12 +415,12 @@ message SparkRJob {
// occur that causes an incorrect job submission.
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// R drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory of
// Spark drivers and tasks. Supported file types:
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -565,9 +566,9 @@ message JobStatus {

// Encapsulates the full scoping used to reference a job.
message JobReference {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The ID of the Google Cloud Platform project that the job belongs to. If
// specified, must match the request project ID.
string project_id = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. The job ID, which must be unique within the project.
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ option java_package = "com.google.cloud.dataproc.v1";

// Cluster components that can be activated.
enum Component {
// Unspecified component.
// Unspecified component. Specifying this will cause Cluster creation to fail.
COMPONENT_UNSPECIFIED = 0;

// The Anaconda python distribution.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ message WorkflowTemplate {
// Required. The Directed Acyclic Graph of Jobs to submit.
repeated OrderedJob jobs = 8 [(google.api.field_behavior) = REQUIRED];

// Optional. emplate parameters whose values are substituted into the
// Optional. Template parameters whose values are substituted into the
// template. Values for parameters must be provided when the template is
// instantiated.
repeated TemplateParameter parameters = 9 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -319,23 +319,29 @@ message OrderedJob {

// Required. The job definition.
oneof job_type {
HadoopJob hadoop_job = 2;
// Optional. Job is a Hadoop job.
HadoopJob hadoop_job = 2 [(google.api.field_behavior) = OPTIONAL];

SparkJob spark_job = 3;
// Optional. Job is a Spark job.
SparkJob spark_job = 3 [(google.api.field_behavior) = OPTIONAL];

PySparkJob pyspark_job = 4;
// Optional. Job is a PySpark job.
PySparkJob pyspark_job = 4 [(google.api.field_behavior) = OPTIONAL];

HiveJob hive_job = 5;
// Optional. Job is a Hive job.
HiveJob hive_job = 5 [(google.api.field_behavior) = OPTIONAL];

PigJob pig_job = 6;
// Optional. Job is a Pig job.
PigJob pig_job = 6 [(google.api.field_behavior) = OPTIONAL];

// Spark R job
SparkRJob spark_r_job = 11;
// Optional. Job is a SparkR job.
SparkRJob spark_r_job = 11 [(google.api.field_behavior) = OPTIONAL];

SparkSqlJob spark_sql_job = 7;
// Optional. Job is a SparkSql job.
SparkSqlJob spark_sql_job = 7 [(google.api.field_behavior) = OPTIONAL];

// Presto job
PrestoJob presto_job = 12;
// Optional. Job is a Presto job.
PrestoJob presto_job = 12 [(google.api.field_behavior) = OPTIONAL];
}

// Optional. The labels to associate with this job.
Expand Down
Loading

0 comments on commit b135c5b

Please sign in to comment.