From ecf402e0d0addf49506cdd90182e6a7657aadafe Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Fri, 5 Mar 2021 12:06:08 +0100 Subject: [PATCH 1/4] initial work on the Azure Spot instances support --- CHANGELOG.md | 1 + compute/model.go | 60 ++++++++++++++++++++++++------------- compute/resource_cluster.go | 5 ++++ docs/resources/cluster.md | 34 ++++++++++++++++++++- 4 files changed, 79 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb4d5f3381..41359a8d8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## 0.3.2 * Fix incorrect escaping of notebook names ([#566](https://github.com/databrickslabs/terraform-provider-databricks/pull/566)) +* Added support for spot instances on Azure (TODO) ## 0.3.1 diff --git a/compute/model.go b/compute/model.go index 37d028009d..54a9c7c421 100644 --- a/compute/model.go +++ b/compute/model.go @@ -11,8 +11,8 @@ type AutoScale struct { MaxWorkers int32 `json:"max_workers,omitempty"` } -// AwsAvailability is a type for describing AWS availability on cluster nodes -type AwsAvailability string +// Availability is a type for describing AWS availability on cluster nodes +type Availability string const ( // AwsAvailabilitySpot is spot instance type for clusters @@ -24,6 +24,16 @@ const ( AwsAvailabilitySpotWithFallback = "SPOT_WITH_FALLBACK" ) +const ( + // AzureAvailabilitySpot is spot instance type for clusters + AzureAvailabilitySpot = "SPOT_AZURE" + // AzureAvailabilityOnDemand is OnDemand instance type for clusters + AzureAvailabilityOnDemand = "ON_DEMAND_AZURE" + // AzureAvailabilitySpotWithFallback is Spot instance type for clusters with option + // to fallback into on-demand if instance cannot be acquired + AzureAvailabilitySpotWithFallback = "SPOT_WITH_FALLBACK_AZURE" +) + // AzureDiskVolumeType is disk type on azure vms type AzureDiskVolumeType string @@ -112,14 +122,22 @@ type ZonesInfo struct { // AwsAttributes encapsulates the aws attributes for aws based clusters // https://docs.databricks.com/dev-tools/api/latest/clusters.html#clusterclusterattributes type AwsAttributes struct { - FirstOnDemand int32 `json:"first_on_demand,omitempty" tf:"computed"` - Availability AwsAvailability `json:"availability,omitempty" tf:"computed"` - ZoneID string `json:"zone_id,omitempty" tf:"computed"` - InstanceProfileArn string `json:"instance_profile_arn,omitempty"` - SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty" tf:"computed"` - EbsVolumeType EbsVolumeType `json:"ebs_volume_type,omitempty" tf:"computed"` - EbsVolumeCount int32 `json:"ebs_volume_count,omitempty" tf:"computed"` - EbsVolumeSize int32 `json:"ebs_volume_size,omitempty" tf:"computed"` + FirstOnDemand int32 `json:"first_on_demand,omitempty" tf:"computed"` + Availability Availability `json:"availability,omitempty" tf:"computed"` + ZoneID string `json:"zone_id,omitempty" tf:"computed"` + InstanceProfileArn string `json:"instance_profile_arn,omitempty"` + SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty" tf:"computed"` + EbsVolumeType EbsVolumeType `json:"ebs_volume_type,omitempty" tf:"computed"` + EbsVolumeCount int32 `json:"ebs_volume_count,omitempty" tf:"computed"` + EbsVolumeSize int32 `json:"ebs_volume_size,omitempty" tf:"computed"` +} + +// AzureAttributes encapsulates the Azure attributes for Azure based clusters +// TODO: add link to documentation after release +type AzureAttributes struct { + FirstOnDemand int32 `json:"first_on_demand,omitempty" tf:"computed"` + Availability Availability `json:"availability,omitempty" tf:"computed"` + SpotBidMaxPrice float64 `json:"spot_bid_max_price,omitempty" tf:"computed"` } // DbfsStorageInfo contains the destination string for DBFS @@ -233,12 +251,13 @@ type Cluster struct { EnableElasticDisk bool `json:"enable_elastic_disk,omitempty" tf:"computed"` EnableLocalDiskEncryption bool `json:"enable_local_disk_encryption,omitempty"` - NodeTypeID string `json:"node_type_id,omitempty" tf:"group:node_type,computed"` - DriverNodeTypeID string `json:"driver_node_type_id,omitempty" tf:"conflicts:instance_pool_id,computed"` - InstancePoolID string `json:"instance_pool_id,omitempty" tf:"group:node_type"` - PolicyID string `json:"policy_id,omitempty"` - AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty" tf:"conflicts:instance_pool_id"` - AutoterminationMinutes int32 `json:"autotermination_minutes,omitempty"` + NodeTypeID string `json:"node_type_id,omitempty" tf:"group:node_type,computed"` + DriverNodeTypeID string `json:"driver_node_type_id,omitempty" tf:"conflicts:instance_pool_id,computed"` + InstancePoolID string `json:"instance_pool_id,omitempty" tf:"group:node_type"` + PolicyID string `json:"policy_id,omitempty"` + AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty" tf:"conflicts:instance_pool_id"` + AzureAttributes *AzureAttributes `json:"azure_attributes,omitempty" tf:"conflicts:instance_pool_id"` + AutoterminationMinutes int32 `json:"autotermination_minutes,omitempty"` SparkConf map[string]string `json:"spark_conf,omitempty"` SparkEnvVars map[string]string `json:"spark_env_vars,omitempty"` @@ -272,6 +291,7 @@ type ClusterInfo struct { SparkVersion string `json:"spark_version"` SparkConf map[string]string `json:"spark_conf,omitempty"` AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty"` + AzureAttributes *AzureAttributes `json:"azure_attributes,omitempty"` NodeTypeID string `json:"node_type_id,omitempty"` DriverNodeTypeID string `json:"driver_node_type_id,omitempty"` SSHPublicKeys []string `json:"ssh_public_keys,omitempty"` @@ -285,7 +305,7 @@ type ClusterInfo struct { InstancePoolID string `json:"instance_pool_id,omitempty"` PolicyID string `json:"policy_id,omitempty"` SingleUserName string `json:"single_user_name,omitempty"` - ClusterSource AwsAvailability `json:"cluster_source,omitempty"` + ClusterSource Availability `json:"cluster_source,omitempty"` DockerImage *DockerImage `json:"docker_image,omitempty"` State ClusterState `json:"state"` StateMessage string `json:"state_message,omitempty"` @@ -344,9 +364,9 @@ type Command struct { // InstancePoolAwsAttributes contains aws attributes for AWS Databricks deployments for instance pools type InstancePoolAwsAttributes struct { - Availability AwsAvailability `json:"availability,omitempty"` - ZoneID string `json:"zone_id"` - SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty"` + Availability Availability `json:"availability,omitempty"` + ZoneID string `json:"zone_id"` + SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty"` } // InstancePoolDiskType contains disk type information for each of the different cloud service providers diff --git a/compute/resource_cluster.go b/compute/resource_cluster.go index 1b524a3849..2167a8ee34 100644 --- a/compute/resource_cluster.go +++ b/compute/resource_cluster.go @@ -68,6 +68,8 @@ func resourceClusterSchema() map[string]*schema.Schema { Optional: true, Computed: true, } + s["aws_attributes"].ConflictsWith = []string{"azure_attributes"} + s["azure_attributes"].ConflictsWith = []string{"aws_attributes"} s["is_pinned"] = &schema.Schema{ Type: schema.TypeBool, Optional: true, @@ -321,6 +323,9 @@ func modifyClusterRequest(clusterModel *Cluster) { } clusterModel.AwsAttributes = &awsAttributes } + if clusterModel.AzureAttributes != nil { + clusterModel.AzureAttributes = nil + } clusterModel.EnableElasticDisk = false clusterModel.NodeTypeID = "" clusterModel.DriverNodeTypeID = "" diff --git a/docs/resources/cluster.md b/docs/resources/cluster.md index fea17a1309..64b5bf3b71 100644 --- a/docs/resources/cluster.md +++ b/docs/resources/cluster.md @@ -259,7 +259,7 @@ resource "databricks_cluster" "this" { The following options are available: * `zone_id` - (Required) Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like “us-west-2a”. The provided availability zone must be in the same region as the Databricks deployment. For example, “us-west-2a” is not a valid zone ID if the Databricks deployment resides in the “us-east-1” region. -* `availability` - (Optional) Availability type used for all subsequent nodes past the `first_on_demand` ones. Valid values are `SPOT` and `ON_DEMAND`. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. +* `availability` - (Optional) Availability type used for all subsequent nodes past the `first_on_demand` ones. Valid values are `SPOT`, `SPOT_WITH_FALLBACK` and `ON_DEMAND`. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. * `first_on_demand` - (Optional) The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. * `spot_bid_price_percent` - (Optional) The max price for AWS spot instances, as a percentage of the corresponding instance type’s on-demand price. For example, if this field is set to 50, and the cluster needs a new `i3.xlarge` spot instance, then the max price is half of the price of on-demand `i3.xlarge` instances. Similarly, if this field is set to 200, the max price is twice the price of on-demand `i3.xlarge` instances. If not specified, the default value is `100`. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than `10000`. * `instance_profile_arn` - (Optional) Nodes for this cluster will only be placed on AWS instances with this instance profile. Please see [databricks_instance_profile](instance_profile.md) resource documentation for extended examples on adding a valid instance profile using Terraform. @@ -267,6 +267,38 @@ The following options are available: * `ebs_volume_count` - (Optional) The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. If EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden. * `ebs_volume_size` - (Optional) The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized). +## azure_attributes + +`azure_attributes` optional configuration block contains attributes related to [clusters running on Azure](TODO). + +-> **Note** *(Azure only)* Please specify empty configuration block (`azure_attributes {}`), even if you're not setting any custom values. This will prevent any resource update issues. + +Here is the example of shared autoscaling cluster with some of AWS options set: + +```hcl +resource "databricks_cluster" "this" { + cluster_name = "Shared Autoscaling" + spark_version = "6.6.x-scala2.11" + node_type_id = "Standard_DS3_v2" + autotermination_minutes = 20 + autoscale { + min_workers = 1 + max_workers = 50 + } + azure_attributes { + availability = "SPOT_AZURE" + first_on_demand = 1 + spot_bid_max_price = 100 + } +} +``` + +The following options are available: + +* `availability` - (Optional) Availability type used for all subsequent nodes past the `first_on_demand` ones. Valid values are `SPOT_AZURE`, `SPOT_WITH_FALLBACK`, and `ON_DEMAND_AZURE`. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. +* `first_on_demand` - (Optional) The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. +* `spot_bid_max_price` - (Optional) The max price for Azure spot instances. + ## docker_image [Databricks Container Services](https://docs.databricks.com/clusters/custom-containers.html) lets you specify a Docker image when you create a cluster. You need to enable Container Services in *Admin Console / Advanced* page in the user interface. By enabling this feature, you acknowledge and agree that your usage of this feature is subject to the [applicable additional terms](http://www.databricks.com/product-specific-terms). From 71de5444bb8b35a6baf7dd3cc5bd401b1730ce44 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Tue, 23 Mar 2021 08:48:28 +0100 Subject: [PATCH 2/4] initial support for GCP preemtible executors --- compute/model.go | 9 +++++++++ compute/resource_cluster.go | 8 ++++++-- docs/resources/cluster.md | 13 ++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/compute/model.go b/compute/model.go index 54a9c7c421..ba431c4a73 100644 --- a/compute/model.go +++ b/compute/model.go @@ -140,6 +140,13 @@ type AzureAttributes struct { SpotBidMaxPrice float64 `json:"spot_bid_max_price,omitempty" tf:"computed"` } +// GcpAttributes encapsultes GCP specific attributes +// https://docs.gcp.databricks.com/dev-tools/api/latest/clusters.html#clustergcpattributes +type GcpAttributes struct { + UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty" tf:"computed"` + GoogleServiceAccount string `json:"google_service_account,omitempty" tf:"computed"` +} + // DbfsStorageInfo contains the destination string for DBFS type DbfsStorageInfo struct { Destination string `json:"destination"` @@ -257,6 +264,7 @@ type Cluster struct { PolicyID string `json:"policy_id,omitempty"` AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty" tf:"conflicts:instance_pool_id"` AzureAttributes *AzureAttributes `json:"azure_attributes,omitempty" tf:"conflicts:instance_pool_id"` + GcpAttributes *GcpAttributes `json:"gcp_attributes,omitempty" tf:"conflicts:instance_pool_id"` AutoterminationMinutes int32 `json:"autotermination_minutes,omitempty"` SparkConf map[string]string `json:"spark_conf,omitempty"` @@ -292,6 +300,7 @@ type ClusterInfo struct { SparkConf map[string]string `json:"spark_conf,omitempty"` AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty"` AzureAttributes *AzureAttributes `json:"azure_attributes,omitempty"` + GcpAttributes *GcpAttributes `json:"gcp_attributes,omitempty"` NodeTypeID string `json:"node_type_id,omitempty"` DriverNodeTypeID string `json:"driver_node_type_id,omitempty"` SSHPublicKeys []string `json:"ssh_public_keys,omitempty"` diff --git a/compute/resource_cluster.go b/compute/resource_cluster.go index 2167a8ee34..defd4f652f 100644 --- a/compute/resource_cluster.go +++ b/compute/resource_cluster.go @@ -68,8 +68,9 @@ func resourceClusterSchema() map[string]*schema.Schema { Optional: true, Computed: true, } - s["aws_attributes"].ConflictsWith = []string{"azure_attributes"} - s["azure_attributes"].ConflictsWith = []string{"aws_attributes"} + s["aws_attributes"].ConflictsWith = []string{"azure_attributes", "gcp_attributes"} + s["azure_attributes"].ConflictsWith = []string{"aws_attributes", "gcp_attributes"} + s["gcp_attributes"].ConflictsWith = []string{"aws_attributes", "azure_attributes"} s["is_pinned"] = &schema.Schema{ Type: schema.TypeBool, Optional: true, @@ -326,6 +327,9 @@ func modifyClusterRequest(clusterModel *Cluster) { if clusterModel.AzureAttributes != nil { clusterModel.AzureAttributes = nil } + if clusterModel.GcpAttributes != nil { + clusterModel.GcpAttributes = nil + } clusterModel.EnableElasticDisk = false clusterModel.NodeTypeID = "" clusterModel.DriverNodeTypeID = "" diff --git a/docs/resources/cluster.md b/docs/resources/cluster.md index 64b5bf3b71..22583859f1 100644 --- a/docs/resources/cluster.md +++ b/docs/resources/cluster.md @@ -196,7 +196,7 @@ cluster_log_conf { There are a few more advanced attributes for S3 log delivery: -* `destination` - S3 destination, e.g., `s3://my-bucket/some-prefix` You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. +* `destination` - S3 destination, e.g., `s3://my-bucket/some-prefix` You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. * `region` - (Optional) S3 region, e.g. `us-west-2`. Either `region` or `endpoint` must be set. If both are set, the endpoint is used. * `endpoint` - (Optional) S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either `region` or `endpoint` needs to be set. If both are set, the endpoint is used. * `enable_encryption` - (Optional) Enable server-side encryption, false by default. @@ -299,6 +299,17 @@ The following options are available: * `first_on_demand` - (Optional) The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. * `spot_bid_max_price` - (Optional) The max price for Azure spot instances. +## gcp_attributes + +`gcp_attributes` optional configuration block contains attributes related to [clusters running on GCP](https://docs.gcp.databricks.com/dev-tools/api/latest/clusters.html#clustergcpattributes). + +-> **Note** *(GCP only)* Please specify empty configuration block (`gcp_attributes {}`), even if you're not setting any custom values. This will prevent any resource update issues. + +The following options are available: + +* `use_preemptible_executors` - (Optional, bool) if we should use preemptible executors ([GCP documentation](https://cloud.google.com/compute/docs/instances/preemptible)) +* `google_service_account` - (Optional, string) Google Service Account email address that the cluster uses to authenticate with Google Identity. This field is used for authentication with the GCS and BigQuery data sources. + ## docker_image [Databricks Container Services](https://docs.databricks.com/clusters/custom-containers.html) lets you specify a Docker image when you create a cluster. You need to enable Container Services in *Admin Console / Advanced* page in the user interface. By enabling this feature, you acknowledge and agree that your usage of this feature is subject to the [applicable additional terms](http://www.databricks.com/product-specific-terms). From 81fa607671b299596b7fb496ebce961ce50541dd Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Tue, 23 Mar 2021 10:05:38 +0100 Subject: [PATCH 3/4] Azure spot instances in the instance pool --- CHANGELOG.md | 2 +- compute/model.go | 62 ++++++++++++++++++------------- compute/resource_instance_pool.go | 8 ++++ docs/resources/cluster.md | 6 +-- docs/resources/instance_pool.md | 11 ++++++ 5 files changed, 59 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41359a8d8d..a0aedb8b31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## 0.3.2 * Fix incorrect escaping of notebook names ([#566](https://github.com/databrickslabs/terraform-provider-databricks/pull/566)) -* Added support for spot instances on Azure (TODO) +* Added support for spot instances on Azure ([#571](https://github.com/databrickslabs/terraform-provider-databricks/pull/571)) ## 0.3.1 diff --git a/compute/model.go b/compute/model.go index ba431c4a73..b70cb571c2 100644 --- a/compute/model.go +++ b/compute/model.go @@ -24,6 +24,7 @@ const ( AwsAvailabilitySpotWithFallback = "SPOT_WITH_FALLBACK" ) +// https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/clusters#--azureavailability const ( // AzureAvailabilitySpot is spot instance type for clusters AzureAvailabilitySpot = "SPOT_AZURE" @@ -133,7 +134,7 @@ type AwsAttributes struct { } // AzureAttributes encapsulates the Azure attributes for Azure based clusters -// TODO: add link to documentation after release +// https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/clusters#clusterazureattributes type AzureAttributes struct { FirstOnDemand int32 `json:"first_on_demand,omitempty" tf:"computed"` Availability Availability `json:"availability,omitempty" tf:"computed"` @@ -378,6 +379,13 @@ type InstancePoolAwsAttributes struct { SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty"` } +// InstancePoolAzureAttributes contains aws attributes for Azure Databricks deployments for instance pools +// https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/instance-pools#clusterinstancepoolazureattributes +type InstancePoolAzureAttributes struct { + Availability Availability `json:"availability,omitempty"` + SpotBidMaxPrice float64 `json:"spot_bid_max_price,omitempty" tf:"computed"` +} + // InstancePoolDiskType contains disk type information for each of the different cloud service providers type InstancePoolDiskType struct { AzureDiskVolumeType string `json:"azure_disk_volume_type,omitempty"` @@ -393,17 +401,18 @@ type InstancePoolDiskSpec struct { // InstancePool describes the instance pool object on Databricks type InstancePool struct { - InstancePoolID string `json:"instance_pool_id,omitempty" tf:"computed"` - InstancePoolName string `json:"instance_pool_name"` - MinIdleInstances int32 `json:"min_idle_instances,omitempty"` - MaxCapacity int32 `json:"max_capacity,omitempty"` - IdleInstanceAutoTerminationMinutes int32 `json:"idle_instance_autotermination_minutes"` - AwsAttributes *InstancePoolAwsAttributes `json:"aws_attributes,omitempty"` - NodeTypeID string `json:"node_type_id"` - CustomTags map[string]string `json:"custom_tags,omitempty"` - EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"` - DiskSpec *InstancePoolDiskSpec `json:"disk_spec,omitempty"` - PreloadedSparkVersions []string `json:"preloaded_spark_versions,omitempty"` + InstancePoolID string `json:"instance_pool_id,omitempty" tf:"computed"` + InstancePoolName string `json:"instance_pool_name"` + MinIdleInstances int32 `json:"min_idle_instances,omitempty"` + MaxCapacity int32 `json:"max_capacity,omitempty"` + IdleInstanceAutoTerminationMinutes int32 `json:"idle_instance_autotermination_minutes"` + AwsAttributes *InstancePoolAwsAttributes `json:"aws_attributes,omitempty"` + AzureAttributes *InstancePoolAzureAttributes `json:"azure_attributes,omitempty"` + NodeTypeID string `json:"node_type_id"` + CustomTags map[string]string `json:"custom_tags,omitempty"` + EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"` + DiskSpec *InstancePoolDiskSpec `json:"disk_spec,omitempty"` + PreloadedSparkVersions []string `json:"preloaded_spark_versions,omitempty"` } // InstancePoolStats contains the stats on a given pool @@ -416,20 +425,21 @@ type InstancePoolStats struct { // InstancePoolAndStats encapsulates a get response from the GET api for instance pools on Databricks type InstancePoolAndStats struct { - InstancePoolID string `json:"instance_pool_id,omitempty" tf:"computed"` - InstancePoolName string `json:"instance_pool_name"` - MinIdleInstances int32 `json:"min_idle_instances,omitempty"` - MaxCapacity int32 `json:"max_capacity,omitempty"` - AwsAttributes *InstancePoolAwsAttributes `json:"aws_attributes,omitempty"` - NodeTypeID string `json:"node_type_id"` - DefaultTags map[string]string `json:"default_tags,omitempty" tf:"computed"` - CustomTags map[string]string `json:"custom_tags,omitempty"` - IdleInstanceAutoTerminationMinutes int32 `json:"idle_instance_autotermination_minutes"` - EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"` - DiskSpec *InstancePoolDiskSpec `json:"disk_spec,omitempty"` - PreloadedSparkVersions []string `json:"preloaded_spark_versions,omitempty"` - State string `json:"state,omitempty"` - Stats *InstancePoolStats `json:"stats,omitempty"` + InstancePoolID string `json:"instance_pool_id,omitempty" tf:"computed"` + InstancePoolName string `json:"instance_pool_name"` + MinIdleInstances int32 `json:"min_idle_instances,omitempty"` + MaxCapacity int32 `json:"max_capacity,omitempty"` + AwsAttributes *InstancePoolAwsAttributes `json:"aws_attributes,omitempty"` + AzureAttributes *InstancePoolAzureAttributes `json:"azure_attributes,omitempty"` + NodeTypeID string `json:"node_type_id"` + DefaultTags map[string]string `json:"default_tags,omitempty" tf:"computed"` + CustomTags map[string]string `json:"custom_tags,omitempty"` + IdleInstanceAutoTerminationMinutes int32 `json:"idle_instance_autotermination_minutes"` + EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"` + DiskSpec *InstancePoolDiskSpec `json:"disk_spec,omitempty"` + PreloadedSparkVersions []string `json:"preloaded_spark_versions,omitempty"` + State string `json:"state,omitempty"` + Stats *InstancePoolStats `json:"stats,omitempty"` } // InstancePoolList shows list of instance pools diff --git a/compute/resource_instance_pool.go b/compute/resource_instance_pool.go index 1f09ff43c1..382be51631 100644 --- a/compute/resource_instance_pool.go +++ b/compute/resource_instance_pool.go @@ -61,6 +61,8 @@ func ResourceInstancePool() *schema.Resource { s["custom_tags"].ForceNew = true s["enable_elastic_disk"].ForceNew = true s["enable_elastic_disk"].Default = true + s["aws_attributes"].ConflictsWith = []string{"azure_attributes"} + s["azure_attributes"].ConflictsWith = []string{"aws_attributes"} // TODO: check if it's really force new... if v, err := common.SchemaPath(s, "aws_attributes", "availability"); err == nil { v.ForceNew = true @@ -71,6 +73,12 @@ func ResourceInstancePool() *schema.Resource { if v, err := common.SchemaPath(s, "aws_attributes", "spot_bid_price_percent"); err == nil { v.ForceNew = true } + if v, err := common.SchemaPath(s, "azure_attributes", "availability"); err == nil { + v.ForceNew = true + } + if v, err := common.SchemaPath(s, "azure_attributes", "spot_bid_max_price"); err == nil { + v.ForceNew = true + } if v, err := common.SchemaPath(s, "disk_spec", "disk_type", "azure_disk_volume_type"); err == nil { v.ForceNew = true // nolint diff --git a/docs/resources/cluster.md b/docs/resources/cluster.md index 22583859f1..607866b7c5 100644 --- a/docs/resources/cluster.md +++ b/docs/resources/cluster.md @@ -269,7 +269,7 @@ The following options are available: ## azure_attributes -`azure_attributes` optional configuration block contains attributes related to [clusters running on Azure](TODO). +`azure_attributes` optional configuration block contains attributes related to [clusters running on Azure](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/clusters#--azureattributes). -> **Note** *(Azure only)* Please specify empty configuration block (`azure_attributes {}`), even if you're not setting any custom values. This will prevent any resource update issues. @@ -293,11 +293,11 @@ resource "databricks_cluster" "this" { } ``` -The following options are available: +The following options are [available](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/clusters#--azureattributes): * `availability` - (Optional) Availability type used for all subsequent nodes past the `first_on_demand` ones. Valid values are `SPOT_AZURE`, `SPOT_WITH_FALLBACK`, and `ON_DEMAND_AZURE`. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. * `first_on_demand` - (Optional) The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. -* `spot_bid_max_price` - (Optional) The max price for Azure spot instances. +* `spot_bid_max_price` - (Optional) The max price for Azure spot instances. Use `-1` to specify lowest price. ## gcp_attributes diff --git a/docs/resources/instance_pool.md b/docs/resources/instance_pool.md index e4b2d5dc3d..b4d4f9198b 100644 --- a/docs/resources/instance_pool.md +++ b/docs/resources/instance_pool.md @@ -56,6 +56,17 @@ The following arguments are required: * `availability` - (Optional) (String) Availability type used for all instances in the pool. Only `ON_DEMAND` and `SPOT` are supported. * `zone_id` - (Required) (String) Identifier for the availability zone/datacenter in which the instance pool resides. This string is of a form like `"us-west-2a"`. The provided availability zone must be in the same region as the Databricks deployment. For example, `"us-west-2a"` is not a valid zone ID if the Databricks deployment resides in the `"us-east-1"` region. This is an optional field. If not specified, a default zone is used. You can find the list of available zones as well as the default value by using the [List Zones API](https://docs.databricks.com/dev-tools/api/latest/clusters.html#clusterclusterservicelistavailablezones). +## azure_attributes Configuration Block + +`azure_attributes` optional configuration block contains attributes related to [instance pools on Azure](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/instance-pools#--instancepoolazureattributes). + +-> **Note** *(Azure only)* Please specify empty configuration block (`azure_attributes {}`), even if you're not setting any custom values. This will prevent any resource update issues. + +The following options are [available](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/clusters#--azureattributes): + +* `availability` - (Optional) Availability type used for all subsequent nodes past the `first_on_demand` ones. Valid values are `SPOT_AZURE` and `ON_DEMAND_AZURE`. +* `spot_bid_max_price` - (Optional) The max price for Azure spot instances. Use `-1` to specify lowest price. + ### disk_spec Configuration Block From 2e2a2f684d27e6227cf53d706c806723b83aa304 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Tue, 23 Mar 2021 10:05:38 +0100 Subject: [PATCH 4/4] Azure spot instances in the instance pool --- compute/resource_cluster.go | 5 ++++- compute/resource_cluster_test.go | 36 +++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/compute/resource_cluster.go b/compute/resource_cluster.go index defd4f652f..7366cc8717 100644 --- a/compute/resource_cluster.go +++ b/compute/resource_cluster.go @@ -328,7 +328,10 @@ func modifyClusterRequest(clusterModel *Cluster) { clusterModel.AzureAttributes = nil } if clusterModel.GcpAttributes != nil { - clusterModel.GcpAttributes = nil + gcpAttributes := GcpAttributes{ + GoogleServiceAccount: clusterModel.GcpAttributes.GoogleServiceAccount, + } + clusterModel.GcpAttributes = &gcpAttributes } clusterModel.EnableElasticDisk = false clusterModel.NodeTypeID = "" diff --git a/compute/resource_cluster_test.go b/compute/resource_cluster_test.go index e21f9cfb8a..ec423137e9 100644 --- a/compute/resource_cluster_test.go +++ b/compute/resource_cluster_test.go @@ -1055,7 +1055,7 @@ func TestResourceClusterUpdate_FailNumWorkersZero(t *testing.T) { require.Equal(t, true, strings.Contains(err.Error(), "NumWorkers could be 0 only for SingleNode clusters")) } -func TestModifyClusterRequest(t *testing.T) { +func TestModifyClusterRequestAws(t *testing.T) { c := Cluster{ InstancePoolID: "a", AwsAttributes: &AwsAttributes{ @@ -1072,3 +1072,37 @@ func TestModifyClusterRequest(t *testing.T) { assert.Equal(t, "", c.DriverNodeTypeID) assert.Equal(t, false, c.EnableElasticDisk) } + +func TestModifyClusterRequestAzure(t *testing.T) { + c := Cluster{ + InstancePoolID: "a", + AzureAttributes: &AzureAttributes{ + FirstOnDemand: 1, + }, + EnableElasticDisk: true, + NodeTypeID: "d", + DriverNodeTypeID: "e", + } + modifyClusterRequest(&c) + assert.Nil(t, c.AzureAttributes) + assert.Equal(t, "", c.NodeTypeID) + assert.Equal(t, "", c.DriverNodeTypeID) + assert.Equal(t, false, c.EnableElasticDisk) +} + +func TestModifyClusterRequestGcp(t *testing.T) { + c := Cluster{ + InstancePoolID: "a", + GcpAttributes: &GcpAttributes{ + UsePreemptibleExecutors: true, + }, + EnableElasticDisk: true, + NodeTypeID: "d", + DriverNodeTypeID: "e", + } + modifyClusterRequest(&c) + assert.Equal(t, false, c.GcpAttributes.UsePreemptibleExecutors) + assert.Equal(t, "", c.NodeTypeID) + assert.Equal(t, "", c.DriverNodeTypeID) + assert.Equal(t, false, c.EnableElasticDisk) +}