Merge pull request #1970 from hashicorp/f-asg-wait-for-elb-inservice

provider/aws: ASGs can wait for ELB InService
hashicorp · May 14, 2015 · e479876 · e479876
2 parents a0e0d62 + b56a426
commit e479876
Show file tree

Hide file tree

Showing 3 changed files with 135 additions and 24 deletions.
diff --git a/builtin/providers/aws/resource_aws_autoscaling_group.go b/builtin/providers/aws/resource_aws_autoscaling_group.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/awslabs/aws-sdk-go/aws"
 	"github.com/awslabs/aws-sdk-go/service/autoscaling"
+	"github.com/awslabs/aws-sdk-go/service/elb"
 )
 
 func resourceAwsAutoscalingGroup() *schema.Resource {
@@ -38,6 +39,11 @@ func resourceAwsAutoscalingGroup() *schema.Resource {
 				Computed: true,
 			},
 
+			"min_elb_capacity": &schema.Schema{
+				Type:     schema.TypeInt,
+				Optional: true,
+			},
+
 			"min_size": &schema.Schema{
 				Type:     schema.TypeInt,
 				Required: true,
@@ -386,13 +392,19 @@ var waitForASGCapacityTimeout = 10 * time.Minute
 // Waits for a minimum number of healthy instances to show up as healthy in the
 // ASG before continuing. Waits up to `waitForASGCapacityTimeout` for
 // "desired_capacity", or "min_size" if desired capacity is not specified.
+//
+// If "min_elb_capacity" is specified, will also wait for that number of
+// instances to show up InService in all attached ELBs. See "Waiting for
+// Capacity" in docs for more discussion of the feature.
 func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error {
-	waitFor := d.Get("min_size").(int)
+	wantASG := d.Get("min_size").(int)
 	if v := d.Get("desired_capacity").(int); v > 0 {
-		waitFor = v
+		wantASG = v
 	}
+	wantELB := d.Get("min_elb_capacity").(int)
+
+	log.Printf("[DEBUG] Wanting for capacity: %d ASG, %d ELB", wantASG, wantELB)
 
-	log.Printf("[DEBUG] Waiting for group to have %d healthy instances", waitFor)
 	return resource.Retry(waitForASGCapacityTimeout, func() error {
 		g, err := getAwsAutoscalingGroup(d, meta)
 		if err != nil {
@@ -401,24 +413,76 @@ func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error {
 		if g == nil {
 			return nil
 		}
+		lbis, err := getLBInstanceStates(g, meta)
+		if err != nil {
+			return resource.RetryError{Err: err}
+		}
+
+		haveASG := 0
+		haveELB := 0
 
-		healthy := 0
 		for _, i := range g.Instances {
-			if i.HealthStatus == nil {
+			if i.HealthStatus == nil || i.InstanceID == nil || i.LifecycleState == nil {
+				continue
+			}
+
+			if !strings.EqualFold(*i.HealthStatus, "Healthy") {
+				continue
+			}
+
+			if !strings.EqualFold(*i.LifecycleState, "InService") {
 				continue
 			}
-			if strings.EqualFold(*i.HealthStatus, "Healthy") {
-				healthy++
+
+			haveASG++
+
+			if wantELB > 0 {
+				inAllLbs := true
+				for _, states := range lbis {
+					state, ok := states[*i.InstanceID]
+					if !ok || !strings.EqualFold(state, "InService") {
+						inAllLbs = false
+					}
+				}
+				if inAllLbs {
+					haveELB++
+				}
 			}
 		}
 
-		log.Printf(
-			"[DEBUG] %q has %d/%d healthy instances", d.Id(), healthy, waitFor)
+		log.Printf("[DEBUG] %q Capacity: %d/%d ASG, %d/%d ELB",
+			d.Id(), haveASG, wantASG, haveELB, wantELB)
 
-		if healthy >= waitFor {
+		if haveASG >= wantASG && haveELB >= wantELB {
 			return nil
 		}
 
-		return fmt.Errorf("Waiting for healthy instances: %d/%d", healthy, waitFor)
+		return fmt.Errorf("Still need to wait for more healthy instances.")
 	})
 }
+
+// Returns a mapping of the instance states of all the ELBs attached to the
+// provided ASG.
+//
+// Nested like: lbName -> instanceId -> instanceState
+func getLBInstanceStates(g *autoscaling.AutoScalingGroup, meta interface{}) (map[string]map[string]string, error) {
+	lbInstanceStates := make(map[string]map[string]string)
+	elbconn := meta.(*AWSClient).elbconn
+
+	for _, lbName := range g.LoadBalancerNames {
+		lbInstanceStates[*lbName] = make(map[string]string)
+		opts := &elb.DescribeInstanceHealthInput{LoadBalancerName: lbName}
+		r, err := elbconn.DescribeInstanceHealth(opts)
+		if err != nil {
+			return nil, err
+		}
+		for _, is := range r.InstanceStates {
+			if is.InstanceID == nil || is.State == nil {
+				continue
+			}
+			lbInstanceStates[*lbName][*is.InstanceID] = *is.State
+		}
+	}
+
+	return lbInstanceStates, nil
+}
diff --git a/builtin/providers/aws/resource_aws_autoscaling_group_test.go b/builtin/providers/aws/resource_aws_autoscaling_group_test.go
@@ -347,26 +347,36 @@ resource "aws_elb" "bar" {
   availability_zones = ["us-west-2a"]
 
   listener {
-    instance_port = 8000
+    instance_port = 80
     instance_protocol = "http"
     lb_port = 80
     lb_protocol = "http"
   }
+
+  health_check {
+    healthy_threshold = 2
+    unhealthy_threshold = 2
+    target = "HTTP:80/"
+    interval = 5
+    timeout = 2
+  }
 }
 
 resource "aws_launch_configuration" "foobar" {
-  image_id = "ami-21f78e11"
-  instance_type = "t1.micro"
+  // need an AMI that listens on :80 at boot, this is:
+  // bitnami-nginxstack-1.6.1-0-linux-ubuntu-14.04.1-x86_64-hvm-ebs-ami-99f5b1a9-3
+  image_id = "ami-b5b3fc85"
+  instance_type = "t2.micro"
 }
 
 resource "aws_autoscaling_group" "bar" {
   availability_zones = ["us-west-2a"]
   name = "foobar3-terraform-test"
-  max_size = 5
+  max_size = 2
   min_size = 2
   health_check_grace_period = 300
   health_check_type = "ELB"
-  desired_capacity = 4
+  min_elb_capacity = 1
   force_delete = true
 
   launch_configuration = "${aws_launch_configuration.foobar.name}"

diff --git a/website/source/docs/providers/aws/r/autoscale.html.markdown b/website/source/docs/providers/aws/r/autoscale.html.markdown
@@ -43,19 +43,18 @@ The following arguments are supported:
 
 * `name` - (Required) The name of the auto scale group.
 * `max_size` - (Required) The maximum size of the auto scale group.
-* `min_size` - (Required) The minimum size of the auto scale group. Terraform
-  waits after ASG creation for this number of healthy instances to show up in
-  the ASG before continuing. Currently, it will wait for a maxiumum of 10m, if
-  ASG creation is taking more than a few minutes, it's worth investigating for
-  scaling actvity errors caused by problems with the selected Launch
-  Configuration.
+* `min_size` - (Required) The minimum size of the auto scale group.
+    (See also [Waiting for Capacity](#waiting-for-capacity) below.)
 * `availability_zones` - (Required) A list of AZs to launch resources in.
 * `launch_configuration` - (Required) The ID of the launch configuration to use.
 * `health_check_grace_period` - (Optional) Time after instance comes into service before checking health.
 * `health_check_type` - (Optional) "EC2" or "ELB". Controls how health checking is done.
 * `desired_capacity` - (Optional) The number of Amazon EC2 instances that
-  should be running in the group. (If this is specified, Terraform will wait for
-  this number of healthy instances after ASG creation instead of `min_size`.)
+    should be running in the group. (See also [Waiting for
+    Capacity](#waiting-for-capacity) below.)
+* `min_elb_capacity` - (Optional) Setting this will cause Terraform to wait
+    for this number of healthy instances all attached load balancers.
+    (See also [Waiting for Capacity](#waiting-for-capacity) below.)
 * `force_delete` - (Optional) Allows deleting the autoscaling group without waiting
    for all instances in the pool to terminate.
 * `load_balancers` (Optional) A list of load balancer names to add to the autoscaling
@@ -88,3 +87,41 @@ The following attributes are exported:
 * `vpc_zone_identifier` - The VPC zone identifier
 * `load_balancers` (Optional) The load balancer names associated with the
    autoscaling group.
+
+<a id="waiting-for-capacity"></a>
+## Waiting for Capacity
+
+A newly-created ASG is initially empty and begins to scale to `min_size` (or
+`desired_capacity`, if specified) by launching instances using the provided
+Launch Configuration. These instances take time to launch and boot.
+
+Terraform provides two mechanisms to help consistently manage ASG scale up
+time across dependent resources.
+
+#### Waiting for ASG Capacity
+
+The first is default behavior. Terraform waits after ASG creation for
+`min_size` (or `desired_capacity`, if specified) healthy instances to show up
+in the ASG before continuing.
+
+Terraform considers an instance "healthy" when the ASG reports `HealthStatus:
+"Healthy"` and `LifecycleState: "InService"`. See the [AWS AutoScaling
+Docs](https://docs.aws.amazon.com/AutoScaling/latest/DeveloperGuide/AutoScalingGroupLifecycle.html)
+for more information on an ASG's lifecycle.
+
+Terraform will wait for healthy instances for up to 10 minutes. If ASG creation
+is taking more than a few minutes, it's worth investigating for scaling actvity
+errors, which can be caused by problems with the selected Launch Configuration.
+
+#### Waiting for ELB Capacity
+
+The second mechanism is optional, and affects ASGs with attached Load
+Balancers. If `min_elb_capacity` is set, Terraform will wait for that number of
+Instances to be `"InService"` in all attached `load_balancers`. This can be
+used to ensure that service is being provided before Terraform moves on.
+
+As with ASG Capacity, Terraform will wait for up to 10 minutes for
+`"InService"` instances. If ASG creation takes more than a few minutes, this
+could indicate one of a number of configuration problems. See the [AWS Docs on
+Load Balancer Troubleshooting](https://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-troubleshooting.html)
+for more information.