Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add wait_for_steady_state attribute to aws_ecs_service #3485

Merged
merged 3 commits into from
Oct 29, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions aws/resource_aws_ecs_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,12 @@ func resourceAwsEcsService() *schema.Resource {
},
},
"tags": tagsSchema(),

"wait_for_steady_state": {
Type: schema.TypeBool,
Optional: true,
Default: false,
},
},
}
}
Expand Down Expand Up @@ -528,6 +534,12 @@ func resourceAwsEcsServiceCreate(d *schema.ResourceData, meta interface{}) error
log.Printf("[DEBUG] ECS service created: %s", aws.StringValue(service.ServiceArn))
d.SetId(aws.StringValue(service.ServiceArn))

if d.Get("wait_for_steady_state").(bool) {
if err := waitForSteadyState(conn, d); err != nil {
return err
}
}

return resourceAwsEcsServiceRead(d, meta)
}

Expand All @@ -547,6 +559,9 @@ func resourceAwsEcsServiceRead(d *schema.ResourceData, meta interface{}) error {
var err error
out, err = conn.DescribeServices(&input)
if err != nil {
if d.IsNewResource() && isAWSErr(err, ecs.ErrCodeServiceNotFoundException, "") {
return resource.RetryableError(err)
}
return resource.NonRetryableError(err)
}

Expand Down Expand Up @@ -1032,6 +1047,12 @@ func resourceAwsEcsServiceUpdate(d *schema.ResourceData, meta interface{}) error
}
}

if d.Get("wait_for_steady_state").(bool) {
if err := waitForSteadyState(conn, d); err != nil {
return err
}
}

if d.HasChange("tags") {
o, n := d.GetChange("tags")

Expand Down Expand Up @@ -1161,3 +1182,17 @@ func buildFamilyAndRevisionFromARN(arn string) string {
func getNameFromARN(arn string) string {
return strings.Split(arn, "/")[1]
}

func waitForSteadyState(conn *ecs.ECS, d *schema.ResourceData) error {
input := &ecs.DescribeServicesInput{
Services: aws.StringSlice([]string{d.Id()}),
}
if v, ok := d.GetOk("cluster"); ok {
input.Cluster = aws.String(v.(string))
}

if err := conn.WaitUntilServicesStable(input); err != nil {
return fmt.Errorf("error waiting for service (%s) to reach a steady state: %w", d.Id(), err)
}
return nil
}
301 changes: 301 additions & 0 deletions aws/resource_aws_ecs_service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,78 @@ func TestAccAWSEcsService_withLaunchTypeFargateAndPlatformVersion(t *testing.T)
})
}

func TestAccAWSEcsService_withLaunchTypeFargateAndWaitForSteadyState(t *testing.T) {
var service ecs.Service
resourceName := "aws_ecs_service.test"
rString := acctest.RandString(8)
rName := fmt.Sprintf("tf-acc-svc-w-ltf-ss-%s", rString)

resource.ParallelTest(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckAWSEcsServiceDestroy,
Steps: []resource.TestStep{
{
// Wait for the ECS Cluster to reach a steady state w/specified count
Config: testAccAWSEcsServiceWithLaunchTypeFargateAndWait(rName, 1, true),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSEcsServiceExists(resourceName, &service),
resource.TestCheckResourceAttr(resourceName, "desired_count", "1"),
resource.TestCheckResourceAttr(resourceName, "wait_for_steady_state", "true"),
),
},
{
ResourceName: resourceName,
ImportStateId: fmt.Sprintf("%s/%s", rName, rName),
ImportState: true,
ImportStateVerify: true,
},
},
})
}

func TestAccAWSEcsService_withLaunchTypeFargateAndUpdateWaitForSteadyState(t *testing.T) {
var service ecs.Service
resourceName := "aws_ecs_service.test"
rString := acctest.RandString(8)

rName := fmt.Sprintf("tf-acc-svc-w-ltf-ss-%s", rString)

resource.ParallelTest(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckAWSEcsServiceDestroy,
Steps: []resource.TestStep{
{
Config: testAccAWSEcsServiceWithLaunchTypeFargateWithoutWait(rName),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSEcsServiceExists(resourceName, &service),
resource.TestCheckResourceAttr(resourceName, "desired_count", "1"),
resource.TestCheckResourceAttr(resourceName, "wait_for_steady_state", "false"),
),
},
{
// Modify desired count and wait for the ECS Cluster to reach steady state
Config: testAccAWSEcsServiceWithLaunchTypeFargateAndWait(rName, 2, true),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSEcsServiceExists(resourceName, &service),
resource.TestCheckResourceAttr(resourceName, "desired_count", "2"),
resource.TestCheckResourceAttr(resourceName, "wait_for_steady_state", "true"),
),
},
{
// Modify desired count without wait
Config: testAccAWSEcsServiceWithLaunchTypeFargateAndWait(rName, 1, false),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSEcsServiceExists(resourceName, &service),
resource.TestCheckResourceAttr(resourceName, "desired_count", "1"),
resource.TestCheckResourceAttr(resourceName, "wait_for_steady_state", "false"),
),
},
},
})
}

func TestAccAWSEcsService_withLaunchTypeEC2AndNetworkConfiguration(t *testing.T) {
var service ecs.Service
rString := acctest.RandString(8)
Expand Down Expand Up @@ -1350,6 +1422,235 @@ resource "aws_ecs_service" "mongo" {
`, clusterName, tdName, svcName)
}

func testAccAWSEcsServiceWithLaunchTypeFargateWithoutWait(rName string) string {
return fmt.Sprintf(`
data "aws_availability_zones" "available" {
state = "available"

filter {
name = "opt-in-status"
values = ["opt-in-not-required"]
}
}

resource "aws_vpc" "test" {
cidr_block = "10.10.0.0/16"

tags = {
Name = "terraform-testacc-ecs-service-with-launch-type-fargate"
}
}

resource "aws_subnet" "test" {
count = 2
cidr_block = cidrsubnet(aws_vpc.test.cidr_block, 8, count.index)
availability_zone = data.aws_availability_zones.available.names[count.index]
vpc_id = aws_vpc.test.id

tags = {
Name = "tf-acc-ecs-service-with-launch-type-fargate"
}
}

resource "aws_internet_gateway" "test" {
vpc_id = aws_vpc.test.id
}

resource "aws_route_table" "test" {
vpc_id = aws_vpc.test.id

route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.test.id
}
}

resource "aws_route_table_association" "test" {
count = 2
subnet_id = element(aws_subnet.test.*.id, count.index)
route_table_id = aws_route_table.test.id
}

resource "aws_security_group" "test" {
name = %[1]q
description = "Allow traffic"
vpc_id = aws_vpc.test.id

ingress {
protocol = "6"
from_port = 80
to_port = 8000
cidr_blocks = [aws_vpc.test.cidr_block]
}

egress {
from_port = 0
to_port = 0
protocol = "-1"

cidr_blocks = [
"0.0.0.0/0",
]
}
}

resource "aws_ecs_cluster" "test" {
name = %[1]q
}

resource "aws_ecs_task_definition" "mongo" {
family = %[1]q
network_mode = "awsvpc"
requires_compatibilities = ["FARGATE"]
cpu = "256"
memory = "512"

container_definitions = <<DEFINITION
[
{
"cpu": 256,
"essential": true,
"image": "mongo:latest",
"memory": 512,
"name": "mongodb",
"networkMode": "awsvpc"
}
]
DEFINITION
}

resource "aws_ecs_service" "test" {
name = %[1]q
cluster = aws_ecs_cluster.test.id
task_definition = aws_ecs_task_definition.mongo.arn
desired_count = 1
launch_type = "FARGATE"

network_configuration {
security_groups = [aws_security_group.test.id]
subnets = aws_subnet.test[*].id
assign_public_ip = true
}
}
`, rName)
}

func testAccAWSEcsServiceWithLaunchTypeFargateAndWait(rName string, desiredCount int, waitForSteadyState bool) string {
return fmt.Sprintf(`
data "aws_availability_zones" "available" {
state = "available"

filter {
name = "opt-in-status"
values = ["opt-in-not-required"]
}
}

resource "aws_vpc" "test" {
cidr_block = "10.10.0.0/16"

tags = {
Name = "terraform-testacc-ecs-service-with-launch-type-fargate"
}
}

resource "aws_subnet" "test" {
count = 2
cidr_block = cidrsubnet(aws_vpc.test.cidr_block, 8, count.index)
availability_zone = data.aws_availability_zones.available.names[count.index]
vpc_id = aws_vpc.test.id

tags = {
Name = "tf-acc-ecs-service-with-launch-type-fargate"
}
}

resource "aws_internet_gateway" "test" {
vpc_id = aws_vpc.test.id
}

resource "aws_route_table" "test" {
vpc_id = aws_vpc.test.id

route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.test.id
}
}

resource "aws_route_table_association" "test" {
count = 2
subnet_id = element(aws_subnet.test.*.id, count.index)
route_table_id = aws_route_table.test.id
}
Comment on lines +1577 to +1594
Copy link
Contributor

@anGie44 anGie44 Oct 29, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

required to run task else receives the error CannotPullContainerError and task reaches STOPPED state


resource "aws_security_group" "test" {
name = %[1]q
description = "Allow traffic"
vpc_id = aws_vpc.test.id

ingress {
protocol = "6"
from_port = 80
to_port = 8000
cidr_blocks = [aws_vpc.test.cidr_block]
}

egress {
from_port = 0
to_port = 0
protocol = "-1"

cidr_blocks = [
"0.0.0.0/0",
]
}
}

resource "aws_ecs_cluster" "test" {
name = %[1]q
}

resource "aws_ecs_task_definition" "mongo" {
family = %[1]q
network_mode = "awsvpc"
requires_compatibilities = ["FARGATE"]
cpu = "256"
memory = "512"

container_definitions = <<DEFINITION
[
{
"cpu": 256,
"essential": true,
"image": "mongo:latest",
"memory": 512,
"name": "mongodb",
"networkMode": "awsvpc"
}
]
DEFINITION
}

resource "aws_ecs_service" "test" {
name = %[1]q
cluster = aws_ecs_cluster.test.id
task_definition = aws_ecs_task_definition.mongo.arn
desired_count = %d
launch_type = "FARGATE"

network_configuration {
security_groups = [aws_security_group.test.id]
subnets = aws_subnet.test[*].id
assign_public_ip = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

required for the task to reach RUNNING state -- else task throws error CannotPullContainerError: Error response from daemon:Get https://registry-name/: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)

}

wait_for_steady_state = %t
}

`, rName, desiredCount, waitForSteadyState)
}

func testAccAWSEcsServiceWithInterchangeablePlacementStrategy(clusterName, tdName, svcName string) string {
return fmt.Sprintf(`
resource "aws_ecs_cluster" "default" {
Expand Down
1 change: 1 addition & 0 deletions website/docs/r/ecs_service.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ The following arguments are supported:
* `service_registries` - (Optional) The service discovery registries for the service. The maximum number of `service_registries` blocks is `1`.
* `tags` - (Optional) Key-value map of resource tags
* `task_definition` - (Optional) The family and revision (`family:revision`) or full ARN of the task definition that you want to run in your service. Required unless using the `EXTERNAL` deployment controller. If a revision is not specified, the latest `ACTIVE` revision is used.
* `wait_for_steady_state` - (Optional) If `true`, Terraform will wait for the service to reach a steady state (like [`aws ecs wait services-stable`](https://docs.aws.amazon.com/cli/latest/reference/ecs/wait/services-stable.html)) before continuing. Default `false`.
Copy link
Contributor Author

@mmdriley mmdriley Feb 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to write an acceptance test for when this is true, but I couldn't come up with a reliable, repeatable way to test we weren't hitting a race condition.

This would also be the first test case that required container instances (or which used Fargate and was locked to us-east-1).

I have verified manually that this waits for a new deployment to finish.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What race conditions were you thinking of?

I would have thought that you could simply create an ECS service in one step (using your wait_for_steady_state to make sure it doesn't complete until there is a task running somewhere) and then another step that increases the desired_count and then another step that changes the task definition by setting an environment variable or something.

Creating container instances shouldn't be difficult, I'd just use the latest ECS optimised AMI and set the user data so that it joins the ECS cluster you create for the acceptance test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The race condition I have in mind is the one we hit without this flag: in the case where the to-be-replaced ECS service definition references a resource that will be deleted, Terraform and ECS race and Terraform can delete the resource while there are still tasks referencing it.

The problem is finding a test that would reliably fail when this flag is unset and reliably pass when it is. The test you describe would pass without this flag, right?

The best idea I have for testing this is a service binary that takes N minutes to start responding to healthchecks, then verify that service update takes at least N minutes. Not great, and for small N it's entirely possible the test would pass without this flag if service update were slow.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we create an ALB and Target Group then the acceptance test can be something as simple as launching busybox sleep 600 in that service.

Since it will never pass health checks the service will never become stable.


## capacity_provider_strategy

Expand Down