Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for spot instances on Azure #571

Merged
merged 4 commits into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## 0.3.2

* Fix incorrect escaping of notebook names ([#566](https://github.com/databrickslabs/terraform-provider-databricks/pull/566))
* Added support for spot instances on Azure ([#571](https://github.com/databrickslabs/terraform-provider-databricks/pull/571))

## 0.3.1

Expand Down
129 changes: 84 additions & 45 deletions compute/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ type AutoScale struct {
MaxWorkers int32 `json:"max_workers,omitempty"`
}

// AwsAvailability is a type for describing AWS availability on cluster nodes
type AwsAvailability string
// Availability is a type for describing AWS availability on cluster nodes
type Availability string

const (
// AwsAvailabilitySpot is spot instance type for clusters
Expand All @@ -24,6 +24,17 @@ const (
AwsAvailabilitySpotWithFallback = "SPOT_WITH_FALLBACK"
)

// https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/clusters#--azureavailability
const (
// AzureAvailabilitySpot is spot instance type for clusters
AzureAvailabilitySpot = "SPOT_AZURE"
// AzureAvailabilityOnDemand is OnDemand instance type for clusters
AzureAvailabilityOnDemand = "ON_DEMAND_AZURE"
// AzureAvailabilitySpotWithFallback is Spot instance type for clusters with option
// to fallback into on-demand if instance cannot be acquired
AzureAvailabilitySpotWithFallback = "SPOT_WITH_FALLBACK_AZURE"
)

// AzureDiskVolumeType is disk type on azure vms
type AzureDiskVolumeType string

Expand Down Expand Up @@ -112,14 +123,29 @@ type ZonesInfo struct {
// AwsAttributes encapsulates the aws attributes for aws based clusters
// https://docs.databricks.com/dev-tools/api/latest/clusters.html#clusterclusterattributes
type AwsAttributes struct {
FirstOnDemand int32 `json:"first_on_demand,omitempty" tf:"computed"`
Availability AwsAvailability `json:"availability,omitempty" tf:"computed"`
ZoneID string `json:"zone_id,omitempty" tf:"computed"`
InstanceProfileArn string `json:"instance_profile_arn,omitempty"`
SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty" tf:"computed"`
EbsVolumeType EbsVolumeType `json:"ebs_volume_type,omitempty" tf:"computed"`
EbsVolumeCount int32 `json:"ebs_volume_count,omitempty" tf:"computed"`
EbsVolumeSize int32 `json:"ebs_volume_size,omitempty" tf:"computed"`
FirstOnDemand int32 `json:"first_on_demand,omitempty" tf:"computed"`
Availability Availability `json:"availability,omitempty" tf:"computed"`
ZoneID string `json:"zone_id,omitempty" tf:"computed"`
InstanceProfileArn string `json:"instance_profile_arn,omitempty"`
SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty" tf:"computed"`
EbsVolumeType EbsVolumeType `json:"ebs_volume_type,omitempty" tf:"computed"`
EbsVolumeCount int32 `json:"ebs_volume_count,omitempty" tf:"computed"`
EbsVolumeSize int32 `json:"ebs_volume_size,omitempty" tf:"computed"`
}

// AzureAttributes encapsulates the Azure attributes for Azure based clusters
// https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/clusters#clusterazureattributes
type AzureAttributes struct {
FirstOnDemand int32 `json:"first_on_demand,omitempty" tf:"computed"`
Availability Availability `json:"availability,omitempty" tf:"computed"`
SpotBidMaxPrice float64 `json:"spot_bid_max_price,omitempty" tf:"computed"`
}

// GcpAttributes encapsultes GCP specific attributes
// https://docs.gcp.databricks.com/dev-tools/api/latest/clusters.html#clustergcpattributes
type GcpAttributes struct {
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty" tf:"computed"`
GoogleServiceAccount string `json:"google_service_account,omitempty" tf:"computed"`
}

// DbfsStorageInfo contains the destination string for DBFS
Expand Down Expand Up @@ -233,12 +259,14 @@ type Cluster struct {
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty" tf:"computed"`
EnableLocalDiskEncryption bool `json:"enable_local_disk_encryption,omitempty"`

NodeTypeID string `json:"node_type_id,omitempty" tf:"group:node_type,computed"`
DriverNodeTypeID string `json:"driver_node_type_id,omitempty" tf:"conflicts:instance_pool_id,computed"`
InstancePoolID string `json:"instance_pool_id,omitempty" tf:"group:node_type"`
PolicyID string `json:"policy_id,omitempty"`
AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty" tf:"conflicts:instance_pool_id"`
AutoterminationMinutes int32 `json:"autotermination_minutes,omitempty"`
NodeTypeID string `json:"node_type_id,omitempty" tf:"group:node_type,computed"`
DriverNodeTypeID string `json:"driver_node_type_id,omitempty" tf:"conflicts:instance_pool_id,computed"`
InstancePoolID string `json:"instance_pool_id,omitempty" tf:"group:node_type"`
PolicyID string `json:"policy_id,omitempty"`
AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty" tf:"conflicts:instance_pool_id"`
AzureAttributes *AzureAttributes `json:"azure_attributes,omitempty" tf:"conflicts:instance_pool_id"`
GcpAttributes *GcpAttributes `json:"gcp_attributes,omitempty" tf:"conflicts:instance_pool_id"`
AutoterminationMinutes int32 `json:"autotermination_minutes,omitempty"`

SparkConf map[string]string `json:"spark_conf,omitempty"`
SparkEnvVars map[string]string `json:"spark_env_vars,omitempty"`
Expand Down Expand Up @@ -272,6 +300,8 @@ type ClusterInfo struct {
SparkVersion string `json:"spark_version"`
SparkConf map[string]string `json:"spark_conf,omitempty"`
AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty"`
AzureAttributes *AzureAttributes `json:"azure_attributes,omitempty"`
GcpAttributes *GcpAttributes `json:"gcp_attributes,omitempty"`
NodeTypeID string `json:"node_type_id,omitempty"`
DriverNodeTypeID string `json:"driver_node_type_id,omitempty"`
SSHPublicKeys []string `json:"ssh_public_keys,omitempty"`
Expand All @@ -285,7 +315,7 @@ type ClusterInfo struct {
InstancePoolID string `json:"instance_pool_id,omitempty"`
PolicyID string `json:"policy_id,omitempty"`
SingleUserName string `json:"single_user_name,omitempty"`
ClusterSource AwsAvailability `json:"cluster_source,omitempty"`
ClusterSource Availability `json:"cluster_source,omitempty"`
DockerImage *DockerImage `json:"docker_image,omitempty"`
State ClusterState `json:"state"`
StateMessage string `json:"state_message,omitempty"`
Expand Down Expand Up @@ -344,9 +374,16 @@ type Command struct {

// InstancePoolAwsAttributes contains aws attributes for AWS Databricks deployments for instance pools
type InstancePoolAwsAttributes struct {
Availability AwsAvailability `json:"availability,omitempty"`
ZoneID string `json:"zone_id"`
SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty"`
Availability Availability `json:"availability,omitempty"`
ZoneID string `json:"zone_id"`
SpotBidPricePercent int32 `json:"spot_bid_price_percent,omitempty"`
}

// InstancePoolAzureAttributes contains aws attributes for Azure Databricks deployments for instance pools
// https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/instance-pools#clusterinstancepoolazureattributes
type InstancePoolAzureAttributes struct {
Availability Availability `json:"availability,omitempty"`
SpotBidMaxPrice float64 `json:"spot_bid_max_price,omitempty" tf:"computed"`
}

// InstancePoolDiskType contains disk type information for each of the different cloud service providers
Expand All @@ -364,17 +401,18 @@ type InstancePoolDiskSpec struct {

// InstancePool describes the instance pool object on Databricks
type InstancePool struct {
InstancePoolID string `json:"instance_pool_id,omitempty" tf:"computed"`
InstancePoolName string `json:"instance_pool_name"`
MinIdleInstances int32 `json:"min_idle_instances,omitempty"`
MaxCapacity int32 `json:"max_capacity,omitempty"`
IdleInstanceAutoTerminationMinutes int32 `json:"idle_instance_autotermination_minutes"`
AwsAttributes *InstancePoolAwsAttributes `json:"aws_attributes,omitempty"`
NodeTypeID string `json:"node_type_id"`
CustomTags map[string]string `json:"custom_tags,omitempty"`
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"`
DiskSpec *InstancePoolDiskSpec `json:"disk_spec,omitempty"`
PreloadedSparkVersions []string `json:"preloaded_spark_versions,omitempty"`
InstancePoolID string `json:"instance_pool_id,omitempty" tf:"computed"`
InstancePoolName string `json:"instance_pool_name"`
MinIdleInstances int32 `json:"min_idle_instances,omitempty"`
MaxCapacity int32 `json:"max_capacity,omitempty"`
IdleInstanceAutoTerminationMinutes int32 `json:"idle_instance_autotermination_minutes"`
AwsAttributes *InstancePoolAwsAttributes `json:"aws_attributes,omitempty"`
AzureAttributes *InstancePoolAzureAttributes `json:"azure_attributes,omitempty"`
NodeTypeID string `json:"node_type_id"`
CustomTags map[string]string `json:"custom_tags,omitempty"`
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"`
DiskSpec *InstancePoolDiskSpec `json:"disk_spec,omitempty"`
PreloadedSparkVersions []string `json:"preloaded_spark_versions,omitempty"`
}

// InstancePoolStats contains the stats on a given pool
Expand All @@ -387,20 +425,21 @@ type InstancePoolStats struct {

// InstancePoolAndStats encapsulates a get response from the GET api for instance pools on Databricks
type InstancePoolAndStats struct {
InstancePoolID string `json:"instance_pool_id,omitempty" tf:"computed"`
InstancePoolName string `json:"instance_pool_name"`
MinIdleInstances int32 `json:"min_idle_instances,omitempty"`
MaxCapacity int32 `json:"max_capacity,omitempty"`
AwsAttributes *InstancePoolAwsAttributes `json:"aws_attributes,omitempty"`
NodeTypeID string `json:"node_type_id"`
DefaultTags map[string]string `json:"default_tags,omitempty" tf:"computed"`
CustomTags map[string]string `json:"custom_tags,omitempty"`
IdleInstanceAutoTerminationMinutes int32 `json:"idle_instance_autotermination_minutes"`
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"`
DiskSpec *InstancePoolDiskSpec `json:"disk_spec,omitempty"`
PreloadedSparkVersions []string `json:"preloaded_spark_versions,omitempty"`
State string `json:"state,omitempty"`
Stats *InstancePoolStats `json:"stats,omitempty"`
InstancePoolID string `json:"instance_pool_id,omitempty" tf:"computed"`
InstancePoolName string `json:"instance_pool_name"`
MinIdleInstances int32 `json:"min_idle_instances,omitempty"`
MaxCapacity int32 `json:"max_capacity,omitempty"`
AwsAttributes *InstancePoolAwsAttributes `json:"aws_attributes,omitempty"`
AzureAttributes *InstancePoolAzureAttributes `json:"azure_attributes,omitempty"`
NodeTypeID string `json:"node_type_id"`
DefaultTags map[string]string `json:"default_tags,omitempty" tf:"computed"`
CustomTags map[string]string `json:"custom_tags,omitempty"`
IdleInstanceAutoTerminationMinutes int32 `json:"idle_instance_autotermination_minutes"`
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"`
DiskSpec *InstancePoolDiskSpec `json:"disk_spec,omitempty"`
PreloadedSparkVersions []string `json:"preloaded_spark_versions,omitempty"`
State string `json:"state,omitempty"`
Stats *InstancePoolStats `json:"stats,omitempty"`
}

// InstancePoolList shows list of instance pools
Expand Down
9 changes: 9 additions & 0 deletions compute/resource_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ func resourceClusterSchema() map[string]*schema.Schema {
Optional: true,
Computed: true,
}
s["aws_attributes"].ConflictsWith = []string{"azure_attributes", "gcp_attributes"}
s["azure_attributes"].ConflictsWith = []string{"aws_attributes", "gcp_attributes"}
s["gcp_attributes"].ConflictsWith = []string{"aws_attributes", "azure_attributes"}
s["is_pinned"] = &schema.Schema{
Type: schema.TypeBool,
Optional: true,
Expand Down Expand Up @@ -321,6 +324,12 @@ func modifyClusterRequest(clusterModel *Cluster) {
}
clusterModel.AwsAttributes = &awsAttributes
}
if clusterModel.AzureAttributes != nil {
clusterModel.AzureAttributes = nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we add a unit test here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But does it make sense to nil the field if it's not nil?..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all Azure attributes now are related to the Spot instances only, and if we're using instance pools, then we shouldn't override instance types?

}
if clusterModel.GcpAttributes != nil {
clusterModel.GcpAttributes = nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we add a unit test here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me add it...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

}
clusterModel.EnableElasticDisk = false
clusterModel.NodeTypeID = ""
clusterModel.DriverNodeTypeID = ""
Expand Down
8 changes: 8 additions & 0 deletions compute/resource_instance_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ func ResourceInstancePool() *schema.Resource {
s["custom_tags"].ForceNew = true
s["enable_elastic_disk"].ForceNew = true
s["enable_elastic_disk"].Default = true
s["aws_attributes"].ConflictsWith = []string{"azure_attributes"}
s["azure_attributes"].ConflictsWith = []string{"aws_attributes"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no gcp attributes for instance pools?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no instance pools support yet, so I can't test it - will add later

// TODO: check if it's really force new...
if v, err := common.SchemaPath(s, "aws_attributes", "availability"); err == nil {
v.ForceNew = true
Expand All @@ -71,6 +73,12 @@ func ResourceInstancePool() *schema.Resource {
if v, err := common.SchemaPath(s, "aws_attributes", "spot_bid_price_percent"); err == nil {
v.ForceNew = true
}
if v, err := common.SchemaPath(s, "azure_attributes", "availability"); err == nil {
v.ForceNew = true
}
if v, err := common.SchemaPath(s, "azure_attributes", "spot_bid_max_price"); err == nil {
v.ForceNew = true
}
if v, err := common.SchemaPath(s, "disk_spec", "disk_type", "azure_disk_volume_type"); err == nil {
v.ForceNew = true
// nolint
Expand Down
Loading