From ba76eea1f6fc12627aa2b1bc359b9e1d1f031c5b Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 27 Mar 2019 14:53:34 -0700 Subject: [PATCH] data/aws/vpc: Only create subnet infrastucture for zones with instances This commit updates our Terraform variables to include the worker subnets, and then switches on that (and the master zones) in Terraform to avoid creating subnet infrastructure (NAT gateways, routes, etc.) in zones that have no instances. This helps address limit issues in high-zone regions like us-east-1, as seen in the limits.md change. The drawback is that users are now on the hook to provision their own subnets in other zones if they decide that they want to grow into a new zone as a day-2 Machine(Set) operation. For now, they'll have to provide their own infrastructure for that, and our user-provided-infrastructure docs should give them sufficient grounding to do so. It's possible that in the future the machine-API or other infrastructure operator could dynamically provision subnets in zones that were not populated at install-time, but I can't hazard a guess as to how likely that will be. The HCL functions for combining the zone lists are documented in [1,2]. [1]: https://www.terraform.io/docs/configuration-0-11/interpolation.html#concat-list1-list2- [2]: https://www.terraform.io/docs/configuration-0-11/interpolation.html#distinct-list- --- data/data/aws/main.tf | 7 +++-- data/data/aws/variables-aws.tf | 5 +++ data/data/aws/vpc/common.tf | 11 ++----- data/data/aws/vpc/outputs.tf | 4 +-- data/data/aws/vpc/variables.tf | 5 +++ data/data/aws/vpc/vpc-private.tf | 6 ++-- data/data/aws/vpc/vpc-public.tf | 8 ++--- docs/user/aws/limits.md | 18 ++++++----- pkg/asset/cluster/tfvars.go | 14 +++++++-- pkg/tfvars/aws/aws.go | 54 ++++++++++++++++++++------------ 10 files changed, 81 insertions(+), 51 deletions(-) diff --git a/data/data/aws/main.tf b/data/data/aws/main.tf index 2390ab054fb..fef5344e6cb 100644 --- a/data/data/aws/main.tf +++ b/data/data/aws/main.tf @@ -72,9 +72,10 @@ module "dns" { module "vpc" { source = "./vpc" - cidr_block = "${var.machine_cidr}" - cluster_id = "${var.cluster_id}" - region = "${var.aws_region}" + cidr_block = "${var.machine_cidr}" + cluster_id = "${var.cluster_id}" + region = "${var.aws_region}" + availability_zones = "${distinct(concat(var.aws_master_availability_zones, var.aws_worker_availability_zones))}" tags = "${local.tags}" } diff --git a/data/data/aws/variables-aws.tf b/data/data/aws/variables-aws.tf index 41f04cb7770..a2f67cbb141 100644 --- a/data/data/aws/variables-aws.tf +++ b/data/data/aws/variables-aws.tf @@ -62,3 +62,8 @@ variable "aws_master_availability_zones" { type = "list" description = "The availability zones in which to create the masters. The length of this list must match master_count." } + +variable "aws_worker_availability_zones" { + type = "list" + description = "The availability zones to provision for workers. Worker instances are created by the machine-API operator, but this variable controls their supporting infrastructure (subnets, routing, etc.)." +} diff --git a/data/data/aws/vpc/common.tf b/data/data/aws/vpc/common.tf index 52921df46bd..b8d6d3c4957 100644 --- a/data/data/aws/vpc/common.tf +++ b/data/data/aws/vpc/common.tf @@ -1,17 +1,10 @@ # Canonical internal state definitions for this module. # read only: only locals and data source definitions allowed. No resources or module blocks in this file -// Fetch a list of available AZs -data "aws_availability_zones" "azs" { - state = "available" -} -// Only reference data sources which are gauranteed to exist at any time (above) in this locals{} block +// Only reference data sources which are guaranteed to exist at any time (above) in this locals{} block locals { - // List of possible AZs for each type of subnet - new_subnet_azs = "${data.aws_availability_zones.azs.names}" - // How many AZs to create subnets in - new_az_count = "${length(local.new_subnet_azs)}" + new_az_count = "${length(var.availability_zones)}" // The VPC ID to use to build the rest of the vpc data sources vpc_id = "${aws_vpc.new_vpc.id}" diff --git a/data/data/aws/vpc/outputs.tf b/data/data/aws/vpc/outputs.tf index 48974ea4b1d..5daf08d93b3 100644 --- a/data/data/aws/vpc/outputs.tf +++ b/data/data/aws/vpc/outputs.tf @@ -3,11 +3,11 @@ output "vpc_id" { } output "az_to_private_subnet_id" { - value = "${zipmap(local.new_subnet_azs, local.private_subnet_ids)}" + value = "${zipmap(var.availability_zones, local.private_subnet_ids)}" } output "az_to_public_subnet_id" { - value = "${zipmap(local.new_subnet_azs, local.public_subnet_ids)}" + value = "${zipmap(var.availability_zones, local.public_subnet_ids)}" } output "public_subnet_ids" { diff --git a/data/data/aws/vpc/variables.tf b/data/data/aws/vpc/variables.tf index e2241af898b..59adeaed354 100644 --- a/data/data/aws/vpc/variables.tf +++ b/data/data/aws/vpc/variables.tf @@ -1,3 +1,8 @@ +variable "availability_zones" { + type = "list" + description = "The availability zones in which to provision subnets." +} + variable "cidr_block" { type = "string" } diff --git a/data/data/aws/vpc/vpc-private.tf b/data/data/aws/vpc/vpc-private.tf index 1df4a74d3a9..fa606b49cd3 100644 --- a/data/data/aws/vpc/vpc-private.tf +++ b/data/data/aws/vpc/vpc-private.tf @@ -3,7 +3,7 @@ resource "aws_route_table" "private_routes" { vpc_id = "${data.aws_vpc.cluster_vpc.id}" tags = "${merge(map( - "Name","${var.cluster_id}-private-${local.new_subnet_azs[count.index]}", + "Name","${var.cluster_id}-private-${var.availability_zones[count.index]}", ), var.tags)}" } @@ -22,10 +22,10 @@ resource "aws_subnet" "private_subnet" { cidr_block = "${cidrsubnet(local.new_private_cidr_range, 3, count.index)}" - availability_zone = "${local.new_subnet_azs[count.index]}" + availability_zone = "${var.availability_zones[count.index]}" tags = "${merge(map( - "Name", "${var.cluster_id}-private-${local.new_subnet_azs[count.index]}", + "Name", "${var.cluster_id}-private-${var.availability_zones[count.index]}", "kubernetes.io/role/internal-elb", "", ), var.tags)}" } diff --git a/data/data/aws/vpc/vpc-public.tf b/data/data/aws/vpc/vpc-public.tf index c20e1f85fb2..1b60d15ba51 100644 --- a/data/data/aws/vpc/vpc-public.tf +++ b/data/data/aws/vpc/vpc-public.tf @@ -31,10 +31,10 @@ resource "aws_subnet" "public_subnet" { cidr_block = "${cidrsubnet(local.new_public_cidr_range, 3, count.index)}" - availability_zone = "${local.new_subnet_azs[count.index]}" + availability_zone = "${var.availability_zones[count.index]}" tags = "${merge(map( - "Name", "${var.cluster_id}-public-${local.new_subnet_azs[count.index]}", + "Name", "${var.cluster_id}-public-${var.availability_zones[count.index]}", ), var.tags)}" } @@ -49,7 +49,7 @@ resource "aws_eip" "nat_eip" { vpc = true tags = "${merge(map( - "Name", "${var.cluster_id}-eip-${local.new_subnet_azs[count.index]}", + "Name", "${var.cluster_id}-eip-${var.availability_zones[count.index]}", ), var.tags)}" # Terraform does not declare an explicit dependency towards the internet gateway. @@ -64,6 +64,6 @@ resource "aws_nat_gateway" "nat_gw" { subnet_id = "${aws_subnet.public_subnet.*.id[count.index]}" tags = "${merge(map( - "Name", "${var.cluster_id}-nat-${local.new_subnet_azs[count.index]}", + "Name", "${var.cluster_id}-nat-${var.availability_zones[count.index]}", ), var.tags)}" } diff --git a/docs/user/aws/limits.md b/docs/user/aws/limits.md index 1182eca1d03..88a8675143d 100644 --- a/docs/user/aws/limits.md +++ b/docs/user/aws/limits.md @@ -23,20 +23,22 @@ limit. ## Elastic Network Interfaces (ENI) -The default installation creates 21 + the number of availability zones of ENIs (e.g. us-east-1 = 21 + 6 = 27 ENIs). +The default installation creates 21 + the number of availability zones of ENIs (e.g. 21 + 3 = 24 ENIs for a three-zone cluster). The default limit per region is 350. Additional ENIs are created for additional machines and elastic load balancers created by cluster usage and deployed workloads. A service limit increase here may be required to satisfy the needs of additional clusters and deployed workloads. ## Elastic IP (EIP) -For a single, default cluster, your account will have the needed capacity limits required. There is one exception, -"EC2-VPC Elastic IPs". The installer creates a public and private subnet for each -[availability zone within a region][availability-zones] to provision the cluster in a highly available configuration. In -each private subnet, a separate [NAT Gateway][nat-gateways] is created and requires a separate [elastic IP][elastic-ip]. -The default limit of 5 is sufficient for most regions and a single cluster. For the us-east-1 region, a higher limit is -required. For multiple clusters, a higher limit is required. Please see [this map][az-map] for a current region map with -availability zone count. We recommend selecting regions with 3 or more availability zones. +By default, the installer distributes control-plane and compute machines across [availability zones within a region][availability-zones] to provision the cluster in a highly available configuration. +Please see [this map][az-map] for a current region map with availability zone count. +We recommend selecting regions with 3 or more availability zones. +You can [provide an install-config](../overview.md#multiple-invocations) to [configure](customization.md) the installer to use specific zones to override that default. + +The installer creates a public and private subnet for each availability zone that will contain machines. +In each private subnet, a separate [NAT Gateway][nat-gateways] is created and requires a separate [EC2-VPC Elastic IP (EIP)][elastic-ip]. +The default limit of 5 is sufficient for a single cluster, unless you have configured your cluster to use more than five zones. +For multiple clusters, a higher limit will likely be required (and will certainly be required to support more than five single-zone clusters). ### Example: Using N. Virginia (us-east-1) diff --git a/pkg/asset/cluster/tfvars.go b/pkg/asset/cluster/tfvars.go index 9de5e8be5fa..4be7ddb95cf 100644 --- a/pkg/asset/cluster/tfvars.go +++ b/pkg/asset/cluster/tfvars.go @@ -61,6 +61,7 @@ func (t *TerraformVariables) Dependencies() []asset.Asset { &bootstrap.Bootstrap{}, &machine.Master{}, &machines.Master{}, + &machines.Worker{}, } } @@ -71,8 +72,9 @@ func (t *TerraformVariables) Generate(parents asset.Parents) error { bootstrapIgnAsset := &bootstrap.Bootstrap{} masterIgnAsset := &machine.Master{} mastersAsset := &machines.Master{} + workersAsset := &machines.Worker{} rhcosImage := new(rhcos.Image) - parents.Get(clusterID, installConfig, bootstrapIgnAsset, masterIgnAsset, mastersAsset, rhcosImage) + parents.Get(clusterID, installConfig, bootstrapIgnAsset, masterIgnAsset, mastersAsset, workersAsset, rhcosImage) platform := installConfig.Config.Platform.Name() switch platform { @@ -118,7 +120,15 @@ func (t *TerraformVariables) Generate(parents asset.Parents) error { for i, m := range masters { masterConfigs[i] = m.Spec.ProviderSpec.Value.Object.(*awsprovider.AWSMachineProviderConfig) } - data, err := awstfvars.TFVars(masterConfigs) + workers, err := workersAsset.MachineSets() + if err != nil { + return err + } + workerConfigs := make([]*awsprovider.AWSMachineProviderConfig, len(workers)) + for i, m := range workers { + workerConfigs[i] = m.Spec.Template.Spec.ProviderSpec.Value.Object.(*awsprovider.AWSMachineProviderConfig) + } + data, err := awstfvars.TFVars(masterConfigs, workerConfigs) if err != nil { return errors.Wrapf(err, "failed to get %s Terraform variables", platform) } diff --git a/pkg/tfvars/aws/aws.go b/pkg/tfvars/aws/aws.go index 456eba934e1..bc599d6a73f 100644 --- a/pkg/tfvars/aws/aws.go +++ b/pkg/tfvars/aws/aws.go @@ -4,6 +4,7 @@ package aws import ( "encoding/json" "fmt" + "sort" "github.com/openshift/installer/pkg/types/aws/defaults" "github.com/pkg/errors" @@ -11,19 +12,20 @@ import ( ) type config struct { - AMI string `json:"aws_ami"` - ExtraTags map[string]string `json:"aws_extra_tags,omitempty"` - BootstrapInstanceType string `json:"aws_bootstrap_instance_type,omitempty"` - MasterInstanceType string `json:"aws_master_instance_type,omitempty"` - AvailabilityZones []string `json:"aws_master_availability_zones"` - IOPS int64 `json:"aws_master_root_volume_iops"` - Size int64 `json:"aws_master_root_volume_size,omitempty"` - Type string `json:"aws_master_root_volume_type,omitempty"` - Region string `json:"aws_region,omitempty"` + AMI string `json:"aws_ami"` + ExtraTags map[string]string `json:"aws_extra_tags,omitempty"` + BootstrapInstanceType string `json:"aws_bootstrap_instance_type,omitempty"` + MasterInstanceType string `json:"aws_master_instance_type,omitempty"` + MasterAvailabilityZones []string `json:"aws_master_availability_zones"` + WorkerAvailabilityZones []string `json:"aws_worker_availability_zones"` + IOPS int64 `json:"aws_master_root_volume_iops"` + Size int64 `json:"aws_master_root_volume_size,omitempty"` + Type string `json:"aws_master_root_volume_type,omitempty"` + Region string `json:"aws_region,omitempty"` } // TFVars generates AWS-specific Terraform variables launching the cluster. -func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) { +func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig, workerConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) { masterConfig := masterConfigs[0] tags := make(map[string]string, len(masterConfig.Tags)) @@ -31,11 +33,22 @@ func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) { tags[tag.Name] = tag.Value } - availabilityZones := make([]string, len(masterConfigs)) + masterAvailabilityZones := make([]string, len(masterConfigs)) for i, c := range masterConfigs { - availabilityZones[i] = c.Placement.AvailabilityZone + masterAvailabilityZones[i] = c.Placement.AvailabilityZone } + exists := struct{}{} + availabilityZoneMap := map[string]struct{}{} + for _, c := range workerConfigs { + availabilityZoneMap[c.Placement.AvailabilityZone] = exists + } + workerAvailabilityZones := make([]string, 0, len(availabilityZoneMap)) + for zone := range availabilityZoneMap { + workerAvailabilityZones = append(workerAvailabilityZones, zone) + } + sort.Strings(workerAvailabilityZones) + if len(masterConfig.BlockDevices) == 0 { return nil, errors.New("block device slice cannot be empty") } @@ -60,14 +73,15 @@ func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) { instanceClass := defaults.InstanceClass(masterConfig.Placement.Region) cfg := &config{ - Region: masterConfig.Placement.Region, - ExtraTags: tags, - AMI: *masterConfig.AMI.ID, - AvailabilityZones: availabilityZones, - BootstrapInstanceType: fmt.Sprintf("%s.large", instanceClass), - MasterInstanceType: masterConfig.InstanceType, - Size: *rootVolume.EBS.VolumeSize, - Type: *rootVolume.EBS.VolumeType, + Region: masterConfig.Placement.Region, + ExtraTags: tags, + AMI: *masterConfig.AMI.ID, + MasterAvailabilityZones: masterAvailabilityZones, + WorkerAvailabilityZones: workerAvailabilityZones, + BootstrapInstanceType: fmt.Sprintf("%s.large", instanceClass), + MasterInstanceType: masterConfig.InstanceType, + Size: *rootVolume.EBS.VolumeSize, + Type: *rootVolume.EBS.VolumeType, } if rootVolume.EBS.Iops != nil {