Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

r/aws_vpc_endpoint_subnet_association: Fix issue with concurrent calls to 'ModifyVpcEndpoint' #3418

Merged
merged 6 commits into from
Jul 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 37 additions & 21 deletions aws/resource_aws_vpc_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ func resourceAwsVpcEndpoint() *schema.Resource {
Optional: true,
},
},

Timeouts: &schema.ResourceTimeout{
Create: schema.DefaultTimeout(10 * time.Minute),
Update: schema.DefaultTimeout(10 * time.Minute),
Delete: schema.DefaultTimeout(10 * time.Minute),
},
}
}

Expand Down Expand Up @@ -155,7 +161,7 @@ func resourceAwsVpcEndpointCreate(d *schema.ResourceData, meta interface{}) erro
log.Printf("[DEBUG] Creating VPC Endpoint: %#v", req)
resp, err := conn.CreateVpcEndpoint(req)
if err != nil {
return fmt.Errorf("Error creating VPC Endpoint: %s", err.Error())
return fmt.Errorf("Error creating VPC Endpoint: %s", err)
}

vpce := resp.VpcEndpoint
Expand All @@ -167,7 +173,7 @@ func resourceAwsVpcEndpointCreate(d *schema.ResourceData, meta interface{}) erro
}
}

if err := vpcEndpointWaitUntilAvailable(d, conn); err != nil {
if err := vpcEndpointWaitUntilAvailable(conn, d.Id(), d.Timeout(schema.TimeoutCreate)); err != nil {
return err
}

Expand All @@ -179,7 +185,7 @@ func resourceAwsVpcEndpointRead(d *schema.ResourceData, meta interface{}) error

vpce, state, err := vpcEndpointStateRefresh(conn, d.Id())()
if err != nil && state != "failed" {
return fmt.Errorf("Error reading VPC Endpoint: %s", err.Error())
return fmt.Errorf("Error reading VPC Endpoint: %s", err)
}

terminalStates := map[string]bool{
Expand Down Expand Up @@ -234,10 +240,10 @@ func resourceAwsVpcEndpointUpdate(d *schema.ResourceData, meta interface{}) erro

log.Printf("[DEBUG] Updating VPC Endpoint: %#v", req)
if _, err := conn.ModifyVpcEndpoint(req); err != nil {
return fmt.Errorf("Error updating VPC Endpoint: %s", err.Error())
return fmt.Errorf("Error updating VPC Endpoint: %s", err)
}

if err := vpcEndpointWaitUntilAvailable(d, conn); err != nil {
if err := vpcEndpointWaitUntilAvailable(conn, d.Id(), d.Timeout(schema.TimeoutUpdate)); err != nil {
return err
}

Expand All @@ -255,20 +261,20 @@ func resourceAwsVpcEndpointDelete(d *schema.ResourceData, meta interface{}) erro
if isAWSErr(err, "InvalidVpcEndpointId.NotFound", "") {
log.Printf("[DEBUG] VPC Endpoint %s is already gone", d.Id())
} else {
return fmt.Errorf("Error deleting VPC Endpoint: %s", err.Error())
return fmt.Errorf("Error deleting VPC Endpoint: %s", err)
}
}

stateConf := &resource.StateChangeConf{
Pending: []string{"available", "pending", "deleting"},
Target: []string{"deleted"},
Refresh: vpcEndpointStateRefresh(conn, d.Id()),
Timeout: 10 * time.Minute,
Timeout: d.Timeout(schema.TimeoutDelete),
Delay: 5 * time.Second,
MinTimeout: 5 * time.Second,
}
if _, err = stateConf.WaitForState(); err != nil {
return fmt.Errorf("Error waiting for VPC Endpoint %s to delete: %s", d.Id(), err.Error())
return fmt.Errorf("Error waiting for VPC Endpoint (%s) to delete: %s", d.Id(), err)
}

return nil
Expand All @@ -284,7 +290,7 @@ func vpcEndpointAccept(conn *ec2.EC2, vpceId, svcName string) error {

describeSvcResp, err := conn.DescribeVpcEndpointServiceConfigurations(describeSvcReq)
if err != nil {
return fmt.Errorf("Error reading VPC Endpoint Service: %s", err.Error())
return fmt.Errorf("Error reading VPC Endpoint Service: %s", err)
}
if describeSvcResp == nil || len(describeSvcResp.ServiceConfigurations) == 0 {
return fmt.Errorf("No matching VPC Endpoint Service found")
Expand All @@ -298,7 +304,7 @@ func vpcEndpointAccept(conn *ec2.EC2, vpceId, svcName string) error {
log.Printf("[DEBUG] Accepting VPC Endpoint connection: %#v", acceptEpReq)
_, err = conn.AcceptVpcEndpointConnections(acceptEpReq)
if err != nil {
return fmt.Errorf("Error accepting VPC Endpoint connection: %s", err.Error())
return fmt.Errorf("Error accepting VPC Endpoint connection: %s", err)
}

return nil
Expand All @@ -312,33 +318,43 @@ func vpcEndpointStateRefresh(conn *ec2.EC2, vpceId string) resource.StateRefresh
})
if err != nil {
if isAWSErr(err, "InvalidVpcEndpointId.NotFound", "") {
return false, "deleted", nil
return "", "deleted", nil
}

return nil, "", err
}

vpce := resp.VpcEndpoints[0]
state := aws.StringValue(vpce.State)
// No use in retrying if the endpoint is in a failed state.
if state == "failed" {
return nil, state, errors.New("VPC Endpoint is in a failed state")
n := len(resp.VpcEndpoints)
switch n {
case 0:
return "", "deleted", nil

case 1:
vpce := resp.VpcEndpoints[0]
state := aws.StringValue(vpce.State)
// No use in retrying if the endpoint is in a failed state.
if state == "failed" {
return nil, state, errors.New("VPC Endpoint is in a failed state")
}
return vpce, state, nil

default:
return nil, "", fmt.Errorf("Found %d VPC Endpoints for %s, expected 1", n, vpceId)
}
return vpce, state, nil
}
}

func vpcEndpointWaitUntilAvailable(d *schema.ResourceData, conn *ec2.EC2) error {
func vpcEndpointWaitUntilAvailable(conn *ec2.EC2, vpceId string, timeout time.Duration) error {
stateConf := &resource.StateChangeConf{
Pending: []string{"pending"},
Target: []string{"available", "pendingAcceptance"},
Refresh: vpcEndpointStateRefresh(conn, d.Id()),
Timeout: 10 * time.Minute,
Refresh: vpcEndpointStateRefresh(conn, vpceId),
Timeout: timeout,
Delay: 5 * time.Second,
MinTimeout: 5 * time.Second,
}
if _, err := stateConf.WaitForState(); err != nil {
return fmt.Errorf("Error waiting for VPC Endpoint %s to become available: %s", d.Id(), err.Error())
return fmt.Errorf("Error waiting for VPC Endpoint (%s) to become available: %s", vpceId, err)
}

return nil
Expand Down
50 changes: 39 additions & 11 deletions aws/resource_aws_vpc_endpoint_subnet_association.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package aws
import (
"fmt"
"log"
"time"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/hashicorp/terraform/helper/hashcode"
"github.com/hashicorp/terraform/helper/resource"
"github.com/hashicorp/terraform/helper/schema"
)

Expand All @@ -32,6 +34,11 @@ func resourceAwsVpcEndpointSubnetAssociation() *schema.Resource {
ForceNew: true,
},
},

Timeouts: &schema.ResourceTimeout{
Create: schema.DefaultTimeout(10 * time.Minute),
Delete: schema.DefaultTimeout(10 * time.Minute),
},
}
}

Expand All @@ -46,15 +53,34 @@ func resourceAwsVpcEndpointSubnetAssociationCreate(d *schema.ResourceData, meta
return err
}

_, err = conn.ModifyVpcEndpoint(&ec2.ModifyVpcEndpointInput{
VpcEndpointId: aws.String(endpointId),
AddSubnetIds: aws.StringSlice([]string{snId}),
})
// See https://github.com/terraform-providers/terraform-provider-aws/issues/3382.
// Prevent concurrent subnet association requests and delay between requests.
mk := "vpc_endpoint_subnet_association_" + endpointId
awsMutexKV.Lock(mk)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we prefer to implement solutions to this problem with resource.Retry() instead of a mutex for a few reasons:

  • Handles cases where the concurrent modification might be happening outside Terraform
  • Better handles EC2 eventual consistency as we do not want to guess about the potential delay (it might be shorter than a minute for the API call to work)
  • Slightly simpler to understand logic (you just simply declare what error(s) you are retrying on with a timeout)

What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll have to refresh my memory of what the underlying EC2 API's behavior is in this case.
I think the behavior was that the ModifyVpcEndpoint API call just hangs waiting for a response in these situations.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, it'd definitely be good to check the behavior on that from the debug logs. If that is the case, where I'm guessing the SDK is automatically retrying when it probably shouldn't, we should probably fix that in by adding a handler in config.go. 😄

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I manually perform the test steps in the AWS console I notice that the VPC Endpoint goes into a pending state while the subnets are being associated. The code doesn't take that into consideration. I'm going to add a WaitForState call and see if that improves things.

defer awsMutexKV.Unlock(mk)

c := &resource.StateChangeConf{
Delay: 1 * time.Minute,
Timeout: 3 * time.Minute,
Target: []string{"ok"},
Refresh: func() (interface{}, string, error) {
res, err := conn.ModifyVpcEndpoint(&ec2.ModifyVpcEndpointInput{
VpcEndpointId: aws.String(endpointId),
AddSubnetIds: aws.StringSlice([]string{snId}),
})
return res, "ok", err
},
}
_, err = c.WaitForState()
if err != nil {
return fmt.Errorf("Error creating Vpc Endpoint/Subnet association: %s", err.Error())
return fmt.Errorf("Error creating Vpc Endpoint/Subnet association: %s", err)
}

d.SetId(vpcEndpointIdSubnetIdHash(endpointId, snId))
d.SetId(vpcEndpointSubnetAssociationId(endpointId, snId))

if err := vpcEndpointWaitUntilAvailable(conn, endpointId, d.Timeout(schema.TimeoutCreate)); err != nil {
return err
}

return resourceAwsVpcEndpointSubnetAssociationRead(d, meta)
}
Expand Down Expand Up @@ -105,24 +131,26 @@ func resourceAwsVpcEndpointSubnetAssociationDelete(d *schema.ResourceData, meta
if err != nil {
ec2err, ok := err.(awserr.Error)
if !ok {
return fmt.Errorf("Error deleting Vpc Endpoint/Subnet association: %s", err.Error())
return fmt.Errorf("Error deleting Vpc Endpoint/Subnet association: %s", err)
}

switch ec2err.Code() {
case "InvalidVpcEndpointId.NotFound":
fallthrough
case "InvalidRouteTableId.NotFound":
fallthrough
case "InvalidParameter":
log.Printf("[DEBUG] Vpc Endpoint/Subnet association is already gone")
default:
return fmt.Errorf("Error deleting Vpc Endpoint/Subnet association: %s", err.Error())
return fmt.Errorf("Error deleting Vpc Endpoint/Subnet association: %s", err)
}
}

if err := vpcEndpointWaitUntilAvailable(conn, endpointId, d.Timeout(schema.TimeoutDelete)); err != nil {
return err
}

return nil
}

func vpcEndpointIdSubnetIdHash(endpointId, snId string) string {
func vpcEndpointSubnetAssociationId(endpointId, snId string) string {
return fmt.Sprintf("a-%s%d", endpointId, hashcode.String(snId))
}
91 changes: 79 additions & 12 deletions aws/resource_aws_vpc_endpoint_subnet_association_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,29 @@ func TestAccAWSVpcEndpointSubnetAssociation_basic(t *testing.T) {
})
}

func TestAccAWSVpcEndpointSubnetAssociation_multiple(t *testing.T) {
var vpce ec2.VpcEndpoint

resource.Test(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckVpcEndpointSubnetAssociationDestroy,
Steps: []resource.TestStep{
resource.TestStep{
Config: testAccVpcEndpointSubnetAssociationConfig_multiple,
Check: resource.ComposeTestCheckFunc(
testAccCheckVpcEndpointSubnetAssociationExists(
"aws_vpc_endpoint_subnet_association.a.0", &vpce),
testAccCheckVpcEndpointSubnetAssociationExists(
"aws_vpc_endpoint_subnet_association.a.1", &vpce),
testAccCheckVpcEndpointSubnetAssociationExists(
"aws_vpc_endpoint_subnet_association.a.2", &vpce),
),
},
},
})
}

func testAccCheckVpcEndpointSubnetAssociationDestroy(s *terraform.State) error {
conn := testAccProvider.Meta().(*AWSClient).ec2conn

Expand Down Expand Up @@ -103,10 +126,46 @@ func testAccCheckVpcEndpointSubnetAssociationExists(n string, vpce *ec2.VpcEndpo
}

const testAccVpcEndpointSubnetAssociationConfig_basic = `
provider "aws" {
region = "us-west-2"
resource "aws_vpc" "foo" {
cidr_block = "10.0.0.0/16"
tags {
Name = "terraform-testacc-vpc-endpoint-subnet-association"
}
}

data "aws_security_group" "default" {
vpc_id = "${aws_vpc.foo.id}"
name = "default"
}

data "aws_region" "current" {}

data "aws_availability_zones" "available" {}

resource "aws_vpc_endpoint" "ec2" {
vpc_id = "${aws_vpc.foo.id}"
vpc_endpoint_type = "Interface"
service_name = "com.amazonaws.${data.aws_region.current.name}.ec2"
security_group_ids = ["${data.aws_security_group.default.id}"]
private_dns_enabled = false
}

resource "aws_subnet" "sn" {
vpc_id = "${aws_vpc.foo.id}"
availability_zone = "${data.aws_availability_zones.available.names[0]}"
cidr_block = "10.0.0.0/17"
tags {
Name = "tf-acc-vpc-endpoint-subnet-association"
}
}

resource "aws_vpc_endpoint_subnet_association" "a" {
vpc_endpoint_id = "${aws_vpc_endpoint.ec2.id}"
subnet_id = "${aws_subnet.sn.id}"
}
`

const testAccVpcEndpointSubnetAssociationConfig_multiple = `
resource "aws_vpc" "foo" {
cidr_block = "10.0.0.0/16"
tags {
Expand All @@ -116,28 +175,36 @@ resource "aws_vpc" "foo" {

data "aws_security_group" "default" {
vpc_id = "${aws_vpc.foo.id}"
name = "default"
name = "default"
}

data "aws_region" "current" {}

data "aws_availability_zones" "available" {}

resource "aws_vpc_endpoint" "ec2" {
vpc_id = "${aws_vpc.foo.id}"
vpc_endpoint_type = "Interface"
service_name = "com.amazonaws.us-west-2.ec2"
security_group_ids = ["${data.aws_security_group.default.id}"]
vpc_id = "${aws_vpc.foo.id}"
vpc_endpoint_type = "Interface"
service_name = "com.amazonaws.${data.aws_region.current.name}.ec2"
security_group_ids = ["${data.aws_security_group.default.id}"]
private_dns_enabled = false
}

resource "aws_subnet" "sn" {
vpc_id = "${aws_vpc.foo.id}"
availability_zone = "us-west-2a"
cidr_block = "10.0.0.0/17"
count = 3

vpc_id = "${aws_vpc.foo.id}"
availability_zone = "${data.aws_availability_zones.available.names[count.index]}"
cidr_block = "${cidrsubnet(aws_vpc.foo.cidr_block, 2, count.index)}"
tags {
Name = "tf-acc-vpc-endpoint-subnet-association"
Name = "${format("tf-acc-vpc-endpoint-subnet-association-%d", count.index + 1)}"
}
}

resource "aws_vpc_endpoint_subnet_association" "a" {
count = 3

vpc_endpoint_id = "${aws_vpc_endpoint.ec2.id}"
subnet_id = "${aws_subnet.sn.id}"
subnet_id = "${aws_subnet.sn.*.id[count.index]}"
}
`
Loading