Skip to content

Commit

Permalink
data/aws/vpc: Only create subnet infrastucture for zones with instances
Browse files Browse the repository at this point in the history
This commit updates our Terraform variables to include the worker
subnets, and then switches on that (and the master zones) in Terraform
to avoid creating subnet infrastructure (NAT gateways, routes, etc.)
in zones that have no instances.  This helps address limit issues in
high-zone regions like us-east-1, as seen in the limits.md change.

The drawback is that users are now on the hook to provision their own
subnets in other zones if they decide that they want to grow into a
new zone as a day-2 Machine(Set) operation.  For now, they'll have to
provide their own infrastructure for that, and our
user-provided-infrastructure docs should give them sufficient
grounding to do so.  It's possible that in the future the machine-API
or other infrastructure operator could dynamically provision subnets
in zones that were not populated at install-time, but I can't hazard a
guess as to how likely that will be.
  • Loading branch information
wking committed Mar 27, 2019
1 parent db872bd commit 009963e
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 51 deletions.
7 changes: 4 additions & 3 deletions data/data/aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ module "dns" {
module "vpc" {
source = "./vpc"

cidr_block = "${var.machine_cidr}"
cluster_id = "${var.cluster_id}"
region = "${var.aws_region}"
cidr_block = "${var.machine_cidr}"
cluster_id = "${var.cluster_id}"
region = "${var.aws_region}"
availability_zones = "${distinct(concat(var.aws_master_availability_zones, var.aws_worker_availability_zones))}"

tags = "${local.tags}"
}
Expand Down
5 changes: 5 additions & 0 deletions data/data/aws/variables-aws.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,8 @@ variable "aws_master_availability_zones" {
type = "list"
description = "The availability zones in which to create the masters. The length of this list must match master_count."
}

variable "aws_worker_availability_zones" {
type = "list"
description = "The availability zones to provision for workers. Worker instances are created by the machine-API operator, but this variable controls their supporting infrastructure (subnets, routing, etc.)."
}
11 changes: 2 additions & 9 deletions data/data/aws/vpc/common.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
# Canonical internal state definitions for this module.
# read only: only locals and data source definitions allowed. No resources or module blocks in this file
// Fetch a list of available AZs
data "aws_availability_zones" "azs" {
state = "available"
}

// Only reference data sources which are gauranteed to exist at any time (above) in this locals{} block
// Only reference data sources which are guaranteed to exist at any time (above) in this locals{} block
locals {
// List of possible AZs for each type of subnet
new_subnet_azs = "${data.aws_availability_zones.azs.names}"

// How many AZs to create subnets in
new_az_count = "${length(local.new_subnet_azs)}"
new_az_count = "${length(var.availability_zones)}"

// The VPC ID to use to build the rest of the vpc data sources
vpc_id = "${aws_vpc.new_vpc.id}"
Expand Down
4 changes: 2 additions & 2 deletions data/data/aws/vpc/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ output "vpc_id" {
}

output "az_to_private_subnet_id" {
value = "${zipmap(local.new_subnet_azs, local.private_subnet_ids)}"
value = "${zipmap(var.availability_zones, local.private_subnet_ids)}"
}

output "az_to_public_subnet_id" {
value = "${zipmap(local.new_subnet_azs, local.public_subnet_ids)}"
value = "${zipmap(var.availability_zones, local.public_subnet_ids)}"
}

output "public_subnet_ids" {
Expand Down
5 changes: 5 additions & 0 deletions data/data/aws/vpc/variables.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
variable "availability_zones" {
type = "list"
description = "The availability zones in which to provision subnets."
}

variable "cidr_block" {
type = "string"
}
Expand Down
6 changes: 3 additions & 3 deletions data/data/aws/vpc/vpc-private.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ resource "aws_route_table" "private_routes" {
vpc_id = "${data.aws_vpc.cluster_vpc.id}"

tags = "${merge(map(
"Name","${var.cluster_id}-private-${local.new_subnet_azs[count.index]}",
"Name","${var.cluster_id}-private-${var.availability_zones[count.index]}",
), var.tags)}"
}

Expand All @@ -22,10 +22,10 @@ resource "aws_subnet" "private_subnet" {

cidr_block = "${cidrsubnet(local.new_private_cidr_range, 3, count.index)}"

availability_zone = "${local.new_subnet_azs[count.index]}"
availability_zone = "${var.availability_zones[count.index]}"

tags = "${merge(map(
"Name", "${var.cluster_id}-private-${local.new_subnet_azs[count.index]}",
"Name", "${var.cluster_id}-private-${var.availability_zones[count.index]}",
"kubernetes.io/role/internal-elb", "",
), var.tags)}"
}
Expand Down
8 changes: 4 additions & 4 deletions data/data/aws/vpc/vpc-public.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ resource "aws_subnet" "public_subnet" {

cidr_block = "${cidrsubnet(local.new_public_cidr_range, 3, count.index)}"

availability_zone = "${local.new_subnet_azs[count.index]}"
availability_zone = "${var.availability_zones[count.index]}"

tags = "${merge(map(
"Name", "${var.cluster_id}-public-${local.new_subnet_azs[count.index]}",
"Name", "${var.cluster_id}-public-${var.availability_zones[count.index]}",
), var.tags)}"
}

Expand All @@ -49,7 +49,7 @@ resource "aws_eip" "nat_eip" {
vpc = true

tags = "${merge(map(
"Name", "${var.cluster_id}-eip-${local.new_subnet_azs[count.index]}",
"Name", "${var.cluster_id}-eip-${var.availability_zones[count.index]}",
), var.tags)}"

# Terraform does not declare an explicit dependency towards the internet gateway.
Expand All @@ -64,6 +64,6 @@ resource "aws_nat_gateway" "nat_gw" {
subnet_id = "${aws_subnet.public_subnet.*.id[count.index]}"

tags = "${merge(map(
"Name", "${var.cluster_id}-nat-${local.new_subnet_azs[count.index]}",
"Name", "${var.cluster_id}-nat-${var.availability_zones[count.index]}",
), var.tags)}"
}
18 changes: 10 additions & 8 deletions docs/user/aws/limits.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,22 @@ limit.

## Elastic Network Interfaces (ENI)

The default installation creates 21 + the number of availability zones of ENIs (e.g. us-east-1 = 21 + 6 = 27 ENIs).
The default installation creates 21 + the number of availability zones of ENIs (e.g. 21 + 3 = 24 ENIs for a three-zone cluster).
The default limit per region is 350. Additional ENIs are created for additional machines and elastic load balancers
created by cluster usage and deployed workloads. A service limit increase here may be required to satisfy the needs of
additional clusters and deployed workloads.

## Elastic IP (EIP)

For a single, default cluster, your account will have the needed capacity limits required. There is one exception,
"EC2-VPC Elastic IPs". The installer creates a public and private subnet for each
[availability zone within a region][availability-zones] to provision the cluster in a highly available configuration. In
each private subnet, a separate [NAT Gateway][nat-gateways] is created and requires a separate [elastic IP][elastic-ip].
The default limit of 5 is sufficient for most regions and a single cluster. For the us-east-1 region, a higher limit is
required. For multiple clusters, a higher limit is required. Please see [this map][az-map] for a current region map with
availability zone count. We recommend selecting regions with 3 or more availability zones.
By default, the installer distributes control-plane and compute machines across [availability zone within a region][availability-zones] to provision the cluster in a highly available configuration.
Please see [this map][az-map] for a current region map with availability zone count.
We recommend selecting regions with 3 or more availability zones.
You can [provide an install-config](../overview.md#multiple-invocations) to [configure](customization.md) the installer to use specific zones to override that default.

The installer creates a public and private subnet for each availability zone that will contain machines.
In each private subnet, a separate [NAT Gateway][nat-gateways] is created and requires a separate [EC2-VPC Elastic IP (EIP)][elastic-ip].
The default limit of 5 is sufficient for a single cluster, unless you have configured your cluster to use more than five zones.
For multiple clusters, a higher limit will likely be required (and will certainly be required to support more than five single-zone clusters).

### Example: Using N. Virginia (us-east-1)

Expand Down
14 changes: 12 additions & 2 deletions pkg/asset/cluster/tfvars.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func (t *TerraformVariables) Dependencies() []asset.Asset {
&bootstrap.Bootstrap{},
&machine.Master{},
&machines.Master{},
&machines.Worker{},
}
}

Expand All @@ -71,8 +72,9 @@ func (t *TerraformVariables) Generate(parents asset.Parents) error {
bootstrapIgnAsset := &bootstrap.Bootstrap{}
masterIgnAsset := &machine.Master{}
mastersAsset := &machines.Master{}
workersAsset := &machines.Worker{}
rhcosImage := new(rhcos.Image)
parents.Get(clusterID, installConfig, bootstrapIgnAsset, masterIgnAsset, mastersAsset, rhcosImage)
parents.Get(clusterID, installConfig, bootstrapIgnAsset, masterIgnAsset, mastersAsset, workersAsset, rhcosImage)

platform := installConfig.Config.Platform.Name()
switch platform {
Expand Down Expand Up @@ -117,7 +119,15 @@ func (t *TerraformVariables) Generate(parents asset.Parents) error {
for i, m := range masters {
masterConfigs[i] = m.Spec.ProviderSpec.Value.Object.(*awsprovider.AWSMachineProviderConfig)
}
data, err := awstfvars.TFVars(masterConfigs)
workers, err := machines.Machines(workersAsset.MachineSetFiles)
if err != nil {
return err
}
workerConfigs := make([]*awsprovider.AWSMachineProviderConfig, len(workers))
for i, m := range workers {
workerConfigs[i] = m.Spec.ProviderSpec.Value.Object.(*awsprovider.AWSMachineProviderConfig)
}
data, err := awstfvars.TFVars(masterConfigs, workerConfigs)
if err != nil {
return errors.Wrapf(err, "failed to get %s Terraform variables", platform)
}
Expand Down
54 changes: 34 additions & 20 deletions pkg/tfvars/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,51 @@ package aws
import (
"encoding/json"
"fmt"
"sort"

"github.com/openshift/installer/pkg/types/aws/defaults"
"github.com/pkg/errors"
"sigs.k8s.io/cluster-api-provider-aws/pkg/apis/awsproviderconfig/v1beta1"
)

type config struct {
AMI string `json:"aws_ami"`
ExtraTags map[string]string `json:"aws_extra_tags,omitempty"`
BootstrapInstanceType string `json:"aws_bootstrap_instance_type,omitempty"`
MasterInstanceType string `json:"aws_master_instance_type,omitempty"`
AvailabilityZones []string `json:"aws_master_availability_zones"`
IOPS int64 `json:"aws_master_root_volume_iops"`
Size int64 `json:"aws_master_root_volume_size,omitempty"`
Type string `json:"aws_master_root_volume_type,omitempty"`
Region string `json:"aws_region,omitempty"`
AMI string `json:"aws_ami"`
ExtraTags map[string]string `json:"aws_extra_tags,omitempty"`
BootstrapInstanceType string `json:"aws_bootstrap_instance_type,omitempty"`
MasterInstanceType string `json:"aws_master_instance_type,omitempty"`
MasterAvailabilityZones []string `json:"aws_master_availability_zones"`
WorkerAvailabilityZones []string `json:"aws_worker_availability_zones"`
IOPS int64 `json:"aws_master_root_volume_iops"`
Size int64 `json:"aws_master_root_volume_size,omitempty"`
Type string `json:"aws_master_root_volume_type,omitempty"`
Region string `json:"aws_region,omitempty"`
}

// TFVars generates AWS-specific Terraform variables launching the cluster.
func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) {
func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig, workerConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) {
masterConfig := masterConfigs[0]

tags := make(map[string]string, len(masterConfig.Tags))
for _, tag := range masterConfig.Tags {
tags[tag.Name] = tag.Value
}

availabilityZones := make([]string, len(masterConfigs))
masterAvailabilityZones := make([]string, len(masterConfigs))
for i, c := range masterConfigs {
availabilityZones[i] = c.Placement.AvailabilityZone
masterAvailabilityZones[i] = c.Placement.AvailabilityZone
}

exists := struct{}{}
availabilityZoneMap := map[string]struct{}{}
for _, c := range workerConfigs {
availabilityZoneMap[c.Placement.AvailabilityZone] = exists
}
workerAvailabilityZones := make([]string, 0, len(availabilityZoneMap))
for zone := range availabilityZoneMap {
workerAvailabilityZones = append(workerAvailabilityZones, zone)
}
sort.Strings(workerAvailabilityZones)

if len(masterConfig.BlockDevices) == 0 {
return nil, errors.New("block device slice cannot be empty")
}
Expand All @@ -60,14 +73,15 @@ func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) {
instanceClass := defaults.InstanceClass(masterConfig.Placement.Region)

cfg := &config{
Region: masterConfig.Placement.Region,
ExtraTags: tags,
AMI: *masterConfig.AMI.ID,
AvailabilityZones: availabilityZones,
BootstrapInstanceType: fmt.Sprintf("%s.large", instanceClass),
MasterInstanceType: masterConfig.InstanceType,
Size: *rootVolume.EBS.VolumeSize,
Type: *rootVolume.EBS.VolumeType,
Region: masterConfig.Placement.Region,
ExtraTags: tags,
AMI: *masterConfig.AMI.ID,
MasterAvailabilityZones: masterAvailabilityZones,
WorkerAvailabilityZones: workerAvailabilityZones,
BootstrapInstanceType: fmt.Sprintf("%s.large", instanceClass),
MasterInstanceType: masterConfig.InstanceType,
Size: *rootVolume.EBS.VolumeSize,
Type: *rootVolume.EBS.VolumeType,
}

if rootVolume.EBS.Iops != nil {
Expand Down

0 comments on commit 009963e

Please sign in to comment.