Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ECS stability improvements #4326

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions builtin/providers/aws/resource_aws_ecs_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,39 @@ func resourceAwsEcsClusterDelete(d *schema.ResourceData, meta interface{}) error

log.Printf("[DEBUG] Deleting ECS cluster %s", d.Id())

clusterName := d.Get("name").(string)

servicesOut, servicesErr := conn.ListServices(&ecs.ListServicesInput{
Cluster: aws.String(clusterName),
})
if servicesErr != nil {
return servicesErr
}

for _, serviceArn := range servicesOut.ServiceArns {
updateInput := ecs.UpdateServiceInput{
Service: aws.String(*serviceArn),
Cluster: aws.String(clusterName),
DesiredCount: aws.Int64(int64(0)),
}
_, updateErr := conn.UpdateService(&updateInput)
if updateErr != nil {
return updateErr
}
log.Printf("[DEBUG] Set DesiredCount to 0 for service %s", *serviceArn)

deleteInput := ecs.DeleteServiceInput{
Service: aws.String(*serviceArn),
Cluster: aws.String(clusterName),
}

_, deleteErr := conn.DeleteService(&deleteInput)
if deleteErr != nil {
return deleteErr
}
log.Printf("[DEBUG] Delete found service %s", *serviceArn)
}

return resource.Retry(10*time.Minute, func() error {
out, err := conn.DeleteCluster(&ecs.DeleteClusterInput{
Cluster: aws.String(d.Id()),
Expand Down
63 changes: 46 additions & 17 deletions builtin/providers/aws/resource_aws_ecs_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,21 +241,18 @@ func resourceAwsEcsServiceUpdate(d *schema.ResourceData, meta interface{}) error
func resourceAwsEcsServiceDelete(d *schema.ResourceData, meta interface{}) error {
conn := meta.(*AWSClient).ecsconn

serviceName := d.Id()
clusterName := d.Get("cluster").(string)

// Check if it's not already gone
resp, err := conn.DescribeServices(&ecs.DescribeServicesInput{
Services: []*string{aws.String(d.Id())},
Cluster: aws.String(d.Get("cluster").(string)),
Services: []*string{aws.String(serviceName)},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ARNs are AFAIK the most specific ids and also may help us while debugging some issues related to region or account ID - imagine you would use wrong AWS credentials and then you'd be wondering why the ECS service is/isn't there or why it has different settings.

For those reasons I'd be personally inclined to keep ARNs where possible, especially in logs.

Cluster: aws.String(clusterName),
})
if err != nil {
return err
}

if len(resp.Services) == 0 {
log.Printf("[DEBUG] ECS Service %q is already gone", d.Id())
return nil
}

log.Printf("[DEBUG] ECS service %s is currently %s", d.Id(), *resp.Services[0].Status)
log.Printf("[DEBUG] ECS service %s is currently %s", serviceName, *resp.Services[0].Status)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason for removing this? I'm aware that ECS services typically remain in INACTIVE state, so it may look like useless 4 lines of code, but imagine a situation where someone has manually removed the service outside of Terraform a long time ago. If we won't have this condition here, the code below would cause crash since it's expecting non-empty list.

if *resp.Services[0].Status == "INACTIVE" {
return nil
Expand All @@ -265,8 +262,8 @@ func resourceAwsEcsServiceDelete(d *schema.ResourceData, meta interface{}) error
if *resp.Services[0].Status != "DRAINING" {
log.Printf("[DEBUG] Draining ECS service %s", d.Id())
_, err = conn.UpdateService(&ecs.UpdateServiceInput{
Service: aws.String(d.Id()),
Cluster: aws.String(d.Get("cluster").(string)),
Service: aws.String(serviceName),
Cluster: aws.String(clusterName),
DesiredCount: aws.Int64(int64(0)),
})
if err != nil {
Expand All @@ -275,8 +272,8 @@ func resourceAwsEcsServiceDelete(d *schema.ResourceData, meta interface{}) error
}

input := ecs.DeleteServiceInput{
Service: aws.String(d.Id()),
Cluster: aws.String(d.Get("cluster").(string)),
Service: aws.String(serviceName),
Cluster: aws.String(clusterName),
}

log.Printf("[DEBUG] Deleting ECS service %s", input)
Expand All @@ -292,16 +289,48 @@ func resourceAwsEcsServiceDelete(d *schema.ResourceData, meta interface{}) error
Timeout: 5 * time.Minute,
MinTimeout: 1 * time.Second,
Refresh: func() (interface{}, string, error) {
log.Printf("[DEBUG] Checking if ECS service %s is INACTIVE", d.Id())
// The DescribeServices returns "DRAINING" for several hours/days
// even when the service is actually deleted and no longer visible
// in the cluster
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 I appreciate the comment here!

I can report this to AWS, unless you've done it already.

log.Printf("[DEBUG] Check if ECS service %s exists in the cluster %s", serviceName, clusterName)
servicesListResp, servicesListErr := conn.ListServices(&ecs.ListServicesInput{
Cluster: aws.String(clusterName),
})

if servicesListErr != nil {
return servicesListErr, "FAILED", servicesListErr
}

foundService := false
for _, serviceArn := range servicesListResp.ServiceArns {
foundServiceName := strings.Split(*serviceArn, "/")[1]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also another reason why keep ARNs instead of names. 😉

if foundServiceName == serviceName {
foundService = true
break
}
}

if !foundService {
return servicesListResp, "INACTIVE", nil
}

log.Printf("[DEBUG] Checking if ECS service %s is INACTIVE or MISSING", d.Id())
resp, err := conn.DescribeServices(&ecs.DescribeServicesInput{
Services: []*string{aws.String(d.Id())},
Cluster: aws.String(d.Get("cluster").(string)),
Services: []*string{aws.String(serviceName)},
Cluster: aws.String(clusterName),
})
if err != nil {
return resp, "FAILED", err
}

return resp, *resp.Services[0].Status, nil
status := *resp.Services[0].Status
log.Printf("[DEBUG] ECS service %s is %s", serviceName, status)

// both these statuses means that the service is deleted
if status == "MISSING" {
status = "INACTIVE"
}
return resp, status, nil
},
}

Expand Down