-
Notifications
You must be signed in to change notification settings - Fork 9.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Spot instance support #2095
Spot instance support #2095
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ import ( | |
"encoding/hex" | ||
"fmt" | ||
"log" | ||
"strconv" | ||
"strings" | ||
"time" | ||
|
||
|
@@ -68,6 +69,16 @@ func resourceAwsInstance() *schema.Resource { | |
Computed: true, | ||
}, | ||
|
||
"spot_persist": &schema.Schema{ | ||
Type: schema.TypeBool, | ||
Optional: true, | ||
}, | ||
|
||
"spot_price": &schema.Schema{ | ||
Type: schema.TypeString, | ||
Optional: true, | ||
}, | ||
|
||
"subnet_id": &schema.Schema{ | ||
Type: schema.TypeString, | ||
Optional: true, | ||
|
@@ -121,6 +132,16 @@ func resourceAwsInstance() *schema.Resource { | |
}, | ||
}, | ||
|
||
"spot_request_id": &schema.Schema{ | ||
Type: schema.TypeString, | ||
Computed: true, | ||
}, | ||
|
||
"spot_request_status": &schema.Schema{ | ||
Type: schema.TypeString, | ||
Computed: true, | ||
}, | ||
|
||
"public_dns": &schema.Schema{ | ||
Type: schema.TypeString, | ||
Computed: true, | ||
|
@@ -514,44 +535,147 @@ func resourceAwsInstanceCreate(d *schema.ResourceData, meta interface{}) error { | |
runOpts.BlockDeviceMappings = blockDevices | ||
} | ||
|
||
// Create the instance | ||
log.Printf("[DEBUG] Run configuration: %#v", runOpts) | ||
var err error | ||
|
||
var runResp *ec2.Reservation | ||
for i := 0; i < 5; i++ { | ||
runResp, err = conn.RunInstances(runOpts) | ||
if awsErr, ok := err.(awserr.Error); ok { | ||
// IAM profiles can take ~10 seconds to propagate in AWS: | ||
// http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#launch-instance-with-role-console | ||
if awsErr.Code() == "InvalidParameterValue" && strings.Contains(awsErr.Message(), "Invalid IAM Instance Profile") { | ||
log.Printf("[DEBUG] Invalid IAM Instance Profile referenced, retrying...") | ||
time.Sleep(2 * time.Second) | ||
continue | ||
instanceID := "" | ||
onDemand := true | ||
|
||
if sp := d.Get("spot_price").(string); sp != "" { | ||
|
||
// FIXME: Write tests for invalid instance type assert rejected | ||
instance_type := d.Get("instance_type").(string) | ||
if instance_type == "t2.micro" || | ||
instance_type == "t2.small" || | ||
instance_type == "t2.medium" { | ||
return fmt.Errorf("Instance type (%s) cannot be used for a spot instance request", instance_type) | ||
} | ||
|
||
// FIXME: Write tests for negative bid assert rejected | ||
// FIXME: Write tests for non decimal bid assert rejected | ||
bid, err := strconv.ParseFloat(sp, 32) | ||
if err == nil { | ||
if bid < 0 { | ||
return fmt.Errorf("Spot bid (%s) must be positive", sp) | ||
} | ||
} else { | ||
return fmt.Errorf("Spot bid (%s) is not a valid decimal", sp) | ||
} | ||
|
||
onDemand = false | ||
|
||
spotPlacement := &ec2.SpotPlacement{ | ||
AvailabilityZone: aws.String(d.Get("availability_zone").(string)), | ||
GroupName: aws.String(d.Get("placement_group").(string)), | ||
} | ||
|
||
launchSpec := &ec2.RequestSpotLaunchSpecification{ | ||
Placement: spotPlacement, | ||
BlockDeviceMappings: runOpts.BlockDeviceMappings, | ||
ImageID: runOpts.ImageID, | ||
InstanceType: runOpts.InstanceType, | ||
UserData: runOpts.UserData, | ||
EBSOptimized: runOpts.EBSOptimized, | ||
IAMInstanceProfile: runOpts.IAMInstanceProfile, | ||
NetworkInterfaces: runOpts.NetworkInterfaces, | ||
} | ||
|
||
if runOpts.SubnetID != nil { | ||
launchSpec.SubnetID = runOpts.SubnetID | ||
} | ||
|
||
if runOpts.SecurityGroupIDs != nil { | ||
launchSpec.SecurityGroupIDs = runOpts.SecurityGroupIDs | ||
} | ||
|
||
if runOpts.SecurityGroups != nil { | ||
launchSpec.SecurityGroups = runOpts.SecurityGroups | ||
} | ||
|
||
if runOpts.KeyName != nil { | ||
launchSpec.KeyName = runOpts.KeyName | ||
} | ||
|
||
spotType := "one-time" | ||
|
||
if d.Get("spot_persist").(bool) { | ||
spotType = "persistent" | ||
} | ||
// Build the spot instance struct | ||
spotOpts := &ec2.RequestSpotInstancesInput{ | ||
SpotPrice: aws.String(sp), | ||
Type: aws.String(spotType), | ||
InstanceCount: aws.Long(1), | ||
LaunchSpecification: launchSpec, | ||
} | ||
|
||
// Make the spot instance request | ||
var spotResp *ec2.RequestSpotInstancesOutput | ||
spotResp, err = conn.RequestSpotInstances(spotOpts) | ||
|
||
request_id := *spotResp.SpotInstanceRequests[0].SpotInstanceRequestID | ||
d.Set("spot_request_id", request_id) | ||
|
||
spotStateConf := &resource.StateChangeConf{ | ||
// http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-bid-status.html | ||
Pending: []string{"start", "pending-evaluation", "pending-fulfillment"}, | ||
Target: "fulfilled", | ||
Refresh: SpotInstanceStateRefreshFunc(conn, request_id), | ||
Timeout: 10 * time.Minute, | ||
Delay: 10 * time.Second, | ||
MinTimeout: 3 * time.Second, | ||
} | ||
|
||
log.Printf("[DEBUG] waiting for spot bid to resolve... this may take several minutes.") | ||
spotRequestRaw, err := spotStateConf.WaitForState() | ||
spotRequest := spotRequestRaw.(*ec2.SpotInstanceRequest) | ||
|
||
if err != nil { | ||
return fmt.Errorf("Error while waiting for spot request (%s) to resolve: %s", request_id, err) | ||
} else { | ||
instanceID = *spotRequest.InstanceID | ||
} | ||
break | ||
} | ||
if err != nil { | ||
return fmt.Errorf("Error launching source instance: %s", err) | ||
} | ||
|
||
instance := runResp.Instances[0] | ||
log.Printf("[INFO] Instance ID: %s", *instance.InstanceID) | ||
if onDemand { | ||
// Create the instance | ||
log.Printf("[DEBUG] Run configuration: %#v", runOpts) | ||
var err error | ||
|
||
var runResp *ec2.Reservation | ||
for i := 0; i < 5; i++ { | ||
runResp, err = conn.RunInstances(runOpts) | ||
if awsErr, ok := err.(awserr.Error); ok { | ||
// IAM profiles can take ~10 seconds to propagate in AWS: | ||
// http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#launch-instance-with-role-console | ||
if awsErr.Code() == "InvalidParameterValue" && strings.Contains(awsErr.Message(), "Invalid IAM Instance Profile") { | ||
log.Printf("[DEBUG] Invalid IAM Instance Profile referenced, retrying...") | ||
time.Sleep(2 * time.Second) | ||
continue | ||
} | ||
} | ||
break | ||
} | ||
if err != nil { | ||
return fmt.Errorf("Error launching source instance: %s", err) | ||
} | ||
|
||
// Store the resulting ID so we can look this up later | ||
d.SetId(*instance.InstanceID) | ||
instance := runResp.Instances[0] | ||
log.Printf("[INFO] Instance ID: %s", *instance.InstanceID) | ||
|
||
// Wait for the instance to become running so we can get some attributes | ||
// that aren't available until later. | ||
log.Printf( | ||
"[DEBUG] Waiting for instance (%s) to become running", | ||
*instance.InstanceID) | ||
// Wait for the instance to become running so we can get some attributes | ||
// that aren't available until later. | ||
log.Printf( | ||
"[DEBUG] Waiting for instance (%s) to become running", | ||
*instance.InstanceID) | ||
|
||
instanceID = *instance.InstanceID | ||
} | ||
|
||
// Store the resulting ID so we can look this up later | ||
d.SetId(instanceID) | ||
|
||
stateConf := &resource.StateChangeConf{ | ||
Pending: []string{"pending"}, | ||
Target: "running", | ||
Refresh: InstanceStateRefreshFunc(conn, *instance.InstanceID), | ||
Refresh: InstanceStateRefreshFunc(conn, instanceID), | ||
Timeout: 10 * time.Minute, | ||
Delay: 10 * time.Second, | ||
MinTimeout: 3 * time.Second, | ||
|
@@ -561,10 +685,10 @@ func resourceAwsInstanceCreate(d *schema.ResourceData, meta interface{}) error { | |
if err != nil { | ||
return fmt.Errorf( | ||
"Error waiting for instance (%s) to become ready: %s", | ||
*instance.InstanceID, err) | ||
instanceID, err) | ||
} | ||
|
||
instance = instanceRaw.(*ec2.Instance) | ||
instance := instanceRaw.(*ec2.Instance) | ||
|
||
// Initialize the connection info | ||
if instance.PublicIPAddress != nil { | ||
|
@@ -591,6 +715,25 @@ func resourceAwsInstanceCreate(d *schema.ResourceData, meta interface{}) error { | |
func resourceAwsInstanceRead(d *schema.ResourceData, meta interface{}) error { | ||
conn := meta.(*AWSClient).ec2conn | ||
|
||
if reqID := d.Get("spot_request_id").(string); reqID != "" { | ||
resp, err := conn.DescribeSpotInstanceRequests(&ec2.DescribeSpotInstanceRequestsInput{ | ||
SpotInstanceRequestIDs: []*string{aws.String(reqID)}, | ||
}) | ||
|
||
if err != nil { | ||
// If the instance was not found, return nil so that we can show | ||
// that the instance is gone. | ||
if ec2err, ok := err.(awserr.Error); ok && ec2err.Code() == "InvalidSpotInstanceRequestID.NotFound" { | ||
d.Set("spot_request_id", "") | ||
return nil | ||
} | ||
|
||
// Some other error, report it | ||
return err | ||
} | ||
d.Set("spot_request_status", resp.SpotInstanceRequests[0].Status.Code) | ||
} | ||
|
||
resp, err := conn.DescribeInstances(&ec2.DescribeInstancesInput{ | ||
InstanceIDs: []*string{aws.String(d.Id())}, | ||
}) | ||
|
@@ -749,13 +892,36 @@ func resourceAwsInstanceUpdate(d *schema.ResourceData, meta interface{}) error { | |
return resourceAwsInstanceRead(d, meta) | ||
} | ||
|
||
// FIXME add test to ensure spot instances are deleted | ||
func resourceAwsInstanceDelete(d *schema.ResourceData, meta interface{}) error { | ||
conn := meta.(*AWSClient).ec2conn | ||
|
||
if reqID := d.Get("spot_request_id").(string); reqID != "" { | ||
log.Printf("[INFO] Cancelling spot request: %s", reqID) | ||
output, err := conn.CancelSpotInstanceRequests(&ec2.CancelSpotInstanceRequestsInput{ | ||
SpotInstanceRequestIDs: []*string{aws.String(reqID)}, | ||
}) | ||
|
||
if err != nil { | ||
return fmt.Errorf( | ||
"Error cancelling spot reservation (%s): %s", | ||
reqID, err) | ||
} | ||
|
||
if output != nil { | ||
cancelled := output.CancelledSpotInstanceRequests | ||
if len(cancelled) != 1 || *cancelled[0].SpotInstanceRequestID != reqID { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little wary of this logic myself. It might cause the stack to fail to delete if the spot instance request wasn't fulfilled correctly (I'm not sure how cancel responds to requests that are already cancelled / in some other bad state) One option I've considered is to check if the spot instance is actually active, and if it's not, just skip trying to cancel altogether. That seems cleanest / safest to me. What do you guys think we should do here? |
||
return fmt.Errorf("Error cancelling reservation: %s %s", reqID, len(cancelled)) | ||
} | ||
} | ||
d.Set("spot_request_id", "") | ||
} | ||
|
||
log.Printf("[INFO] Terminating instance: %s", d.Id()) | ||
req := &ec2.TerminateInstancesInput{ | ||
InstanceIDs: []*string{aws.String(d.Id())}, | ||
} | ||
|
||
if _, err := conn.TerminateInstances(req); err != nil { | ||
return fmt.Errorf("Error terminating instance: %s", err) | ||
} | ||
|
@@ -784,6 +950,37 @@ func resourceAwsInstanceDelete(d *schema.ResourceData, meta interface{}) error { | |
return nil | ||
} | ||
|
||
// SpotInstanceStateRefreshFunc returns a resource.StateRefreshFunc that is used to watch | ||
// an EC2 spot instance request | ||
func SpotInstanceStateRefreshFunc(conn *ec2.EC2, reqID string) resource.StateRefreshFunc { | ||
|
||
return func() (interface{}, string, error) { | ||
resp, err := conn.DescribeSpotInstanceRequests(&ec2.DescribeSpotInstanceRequestsInput{ | ||
SpotInstanceRequestIDs: []*string{aws.String(reqID)}, | ||
}) | ||
|
||
if err != nil { | ||
// If the instance was not found, return nil so that we can show | ||
// that the instance is gone. | ||
if ec2err, ok := err.(awserr.Error); ok && ec2err.Code() == "InvalidSpotInstanceRequestID.NotFound" { | ||
resp = nil | ||
} else { | ||
log.Printf("Error on InstanceStateRefresh: %s", err) | ||
return nil, "", err | ||
} | ||
} | ||
|
||
if resp == nil || len(resp.SpotInstanceRequests) == 0 { | ||
// Sometimes AWS just has consistency issues and doesn't see | ||
// our instance yet. Return an empty state. | ||
return nil, "", nil | ||
} | ||
|
||
req := resp.SpotInstanceRequests[0] | ||
return req, *req.Status.Code, nil | ||
} | ||
} | ||
|
||
// InstanceStateRefreshFunc returns a resource.StateRefreshFunc that is used to watch | ||
// an EC2 instance. | ||
func InstanceStateRefreshFunc(conn *ec2.EC2, instanceID string) resource.StateRefreshFunc { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like the script may wait 10 minutes for a bid to complete. You can't run
terraform apply
in parallel so In the mean time, you will be unable to apply other changes to your infrastructure. How likely is it that you'll wait the full 10 minutes? Can this be shorter (e.g. 10-30s)? Or configurable?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This all comes down to how fast amazon resolves your bid. Typically it's about 5 minutes or less, but I've seen it take as long as half an hour in rare cases.
Making it configurable wouldn't hurt - to be honest though this is just copy pasta. If someone decides they want a spot instance, they should be aware that it will take some time for their bid to resolve
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do I actually need to wait for the bid to resolve? Or can I just post it and move on, and assume my instances may spin up later? When I read the docs on spot I got the impression that you can post a bid and walk away and AWS will boot the instances for you later if the price drops. Maybe that feature is optional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cbednarski that's an interesting proposition.
If we don't wait for the bid to resolve, there's no guarantee that the resource will ever actually be provided, which kind of sucks. On the other hand, it would be nice to be able to put the bid in proceed irregardless of if the bid ever resolves.
What do you think about making this another switch? Something like "spot_wait" to toggle if you should wait for the instance to come up or not. I can see use cases for both.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is more Terraform-like to just wait for the bid for now. Terraform waits for things to be fully ready at a resource level. A spot instance isn't ready until it exists.
Let's get this merged and then we might want to entertain adding another flag to the resource that just tells Terraform to just wait for the bid and move on.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool, I'm working moving spot bids into their own resource right now - i'll make sure that instances wait for the bid to resolve (i had made this a flag)